7869 files changed, 7758695 insertions, 175751 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b493663c7ba7..7e6bc0b3a589 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Drm device configuration
 #
@@ -6,10 +7,16 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU
+	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && HAS_DMA
+	select DRM_PANEL_ORIENTATION_QUIRKS
+	select HDMI
 	select I2C
-	select I2C_ALGOBIT
-	select SLOW_WORK
+	select DMA_SHARED_BUFFER
+	select SYNC_FILE
+# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
+# device and dmabuf fd. Let's make sure that is available for our userspace.
+	select KCMP
+	select VIDEO
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
@@ -19,142 +26,406 @@ menuconfig DRM
 	  details.  You should also select and configure AGP
 	  (/dev/agpgart) support if it is available for your platform.
 
+menu "DRM debugging options"
+depends on DRM
+source "drivers/gpu/drm/Kconfig.debug"
+endmenu
+
+if DRM
+
+config DRM_MIPI_DBI
+	tristate
+	depends on DRM
+	select DRM_KMS_HELPER
+
+config DRM_MIPI_DSI
+	bool
+	depends on DRM
+
 config DRM_KMS_HELPER
 	tristate
 	depends on DRM
-	select FB
-	select FRAMEBUFFER_CONSOLE if !EXPERT
-	select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
+	select FB_CORE if DRM_FBDEV_EMULATION
+	help
+	  CRTC helpers for KMS drivers.
+
+config DRM_DRAW
+	bool
+	depends on DRM
+
+config DRM_PANIC
+	bool "Display a user-friendly message when a kernel panic occurs"
+	depends on DRM
+	select FONT_SUPPORT
+	select DRM_DRAW
+	help
+	  Enable a drm panic handler, which will display a user-friendly message
+	  when a kernel panic occurs. It's useful when using a user-space
+	  console instead of fbcon.
+	  It will only work if your graphic driver supports this feature.
+	  To support Hi-DPI Display, you can enable bigger fonts like
+	  FONT_TER16x32
+
+config DRM_PANIC_FOREGROUND_COLOR
+	hex "Drm panic screen foreground color, in RGB"
+	depends on DRM_PANIC
+	default 0xffffff
+
+config DRM_PANIC_BACKGROUND_COLOR
+	hex "Drm panic screen background color, in RGB"
+	depends on DRM_PANIC
+	default 0x000000
+
+config DRM_PANIC_DEBUG
+	bool "Add a debug fs entry to trigger drm_panic"
+	depends on DRM_PANIC && DEBUG_FS
+	help
+	  Add dri/[device]/drm_panic_plane_x in the kernel debugfs, to force the
+	  panic handler to write the panic message to this plane scanout buffer.
+	  This is unsafe and should not be enabled on a production build.
+	  If in doubt, say "N".
+
+config DRM_PANIC_SCREEN
+	string "Panic screen formatter"
+	default "user"
+	depends on DRM_PANIC
+	help
+	  This option enable to choose what will be displayed when a kernel
+	  panic occurs. You can choose between "user", a short message telling
+	  the user to reboot the system, or "kmsg" which will display the last
+	  lines of kmsg.
+	  This can also be overridden by drm.panic_screen=xxxx kernel parameter
+	  or by writing to /sys/module/drm/parameters/panic_screen sysfs entry
+	  Default is "user"
+
+config DRM_PANIC_SCREEN_QR_CODE
+	bool "Add a panic screen with a QR code"
+	depends on DRM_PANIC && RUST
+	select ZLIB_DEFLATE
+	help
+	  This option adds a QR code generator, and a panic screen with a QR
+	  code. The QR code will contain the last lines of kmsg and other debug
+	  information. This should be easier for the user to report a kernel
+	  panic, with all debug information available.
+	  To use this panic screen, also set DRM_PANIC_SCREEN to "qr_code"
+
+config DRM_PANIC_SCREEN_QR_CODE_URL
+	string "Base URL of the QR code in the panic screen"
+	depends on DRM_PANIC_SCREEN_QR_CODE
+	help
+	  This option sets the base URL to report the kernel panic. If it's set
+	  the QR code will contain the URL and the kmsg compressed with zlib as
+	  a URL parameter. If it's empty, the QR code will contain the kmsg as
+	  uncompressed text only.
+	  There is a demo code in javascript, to decode and uncompress the kmsg
+	  data from the URL parameter at https://github.com/kdj0c/panic_report
+
+config DRM_PANIC_SCREEN_QR_VERSION
+	int "Maximum version (size) of the QR code."
+	depends on DRM_PANIC_SCREEN_QR_CODE
+	default 40
+	help
+	  This option limits the version (or size) of the QR code. QR code
+	  version ranges from Version 1 (21x21) to Version 40 (177x177).
+	  Smaller QR code are easier to read, but will contain less debugging
+	  data. Default is 40.
+
+config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
+        bool "Enable refcount backtrace history in the DP MST helpers"
+	depends on STACKTRACE_SUPPORT
+        select STACKDEPOT
+        select DRM_KMS_HELPER
+        depends on DEBUG_KERNEL
+        depends on EXPERT
+        help
+          Enables debug tracing for topology refs in DRM's DP MST helpers. A
+          history of each topology reference/dereference will be printed to the
+          kernel log once a port or branch device's topology refcount reaches 0.
+
+          This has the potential to use a lot of memory and print some very
+          large kernel messages. If in doubt, say "N".
+
+config DRM_DEBUG_MODESET_LOCK
+	bool "Enable backtrace history for lock contention"
+	depends on STACKTRACE_SUPPORT
+	depends on DEBUG_KERNEL
+	depends on EXPERT
+	select STACKDEPOT
+	default y if DEBUG_WW_MUTEX_SLOWPATH
+	help
+	  Enable debug tracing of failures to gracefully handle drm modeset lock
+	  contention. A history of each drm modeset lock path hitting -EDEADLK
+	  will be saved until gracefully handled, and the backtrace will be
+	  printed when attempting to lock a contended lock.
+
+	  If in doubt, say "N".
+
+config DRM_CLIENT
+	bool
+	depends on DRM
+	help
+	  Enables support for DRM clients. DRM drivers that need
+	  struct drm_client_dev and its interfaces should select this
+	  option. Drivers that support the default clients should
+	  select DRM_CLIENT_SELECTION instead.
+
+source "drivers/gpu/drm/clients/Kconfig"
+
+config DRM_LOAD_EDID_FIRMWARE
+	bool "Allow to specify an EDID data set instead of probing for it"
+	depends on DRM
 	help
-	  FB and CRTC helpers for KMS drivers.
+	  Say Y here, if you want to use EDID data to be loaded from the
+	  /lib/firmware directory or one of the provided built-in
+	  data sets. This may be necessary, if the graphics adapter or
+	  monitor are unable to provide appropriate EDID data. Since this
+	  feature is provided as a workaround for broken hardware, the
+	  default case is N. Details and instructions how to build your own
+	  EDID data are given in Documentation/admin-guide/edid.rst.
+
+source "drivers/gpu/drm/display/Kconfig"
 
 config DRM_TTM
 	tristate
-	depends on DRM
+	depends on DRM && MMU
+	select SHMEM
 	help
 	  GPU memory management subsystem for devices with multiple
 	  GPU memory types. Will be enabled automatically if a device driver
 	  uses it.
 
-config DRM_TDFX
-	tristate "3dfx Banshee/Voodoo3+"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
-	  graphics card.  If M is selected, the module will be called tdfx.
-
-config DRM_R128
-	tristate "ATI Rage 128"
-	depends on DRM && PCI
-	select FW_LOADER
-	help
-	  Choose this option if you have an ATI Rage 128 graphics card.  If M
-	  is selected, the module will be called r128.  AGP support for
-	  this card is strongly suggested (unless you have a PCI version).
-
-config DRM_RADEON
-	tristate "ATI Radeon"
-	depends on DRM && PCI
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	select FW_LOADER
-        select DRM_KMS_HELPER
-        select DRM_TTM
-	select POWER_SUPPLY
-	select HWMON
+config DRM_EXEC
+	tristate
+	depends on DRM
 	help
-	  Choose this option if you have an ATI Radeon graphics card.  There
-	  are both PCI and AGP versions.  You don't need to choose this to
-	  run the Radeon in plain VGA mode.
+	  Execution context for command submissions
 
-	  If M is selected, the module will be called radeon.
+config DRM_GPUVM
+	tristate
+	depends on DRM
+	select DRM_EXEC
+	help
+	  GPU-VM representation providing helpers to manage a GPUs virtual
+	  address space
 
-source "drivers/gpu/drm/radeon/Kconfig"
+config DRM_GPUSVM
+	tristate
+	depends on DRM && DEVICE_PRIVATE
+	select HMM_MIRROR
+	select MMU_NOTIFIER
+	help
+	  GPU-SVM representation providing helpers to manage a GPUs shared
+	  virtual memory
 
-config DRM_I810
-	tristate "Intel I810"
-	# !PREEMPT because of missing ioctl locking
-	depends on DRM && AGP && AGP_INTEL && (!PREEMPT || BROKEN)
+config DRM_BUDDY
+	tristate
+	depends on DRM
 	help
-	  Choose this option if you have an Intel I810 graphics card.  If M is
-	  selected, the module will be called i810.  AGP support is required
-	  for this driver to work.
+	  A page based buddy allocator
 
-config DRM_I915
-	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
+config DRM_VRAM_HELPER
+	tristate
 	depends on DRM
-	depends on AGP
-	depends on AGP_INTEL
-	# we need shmfs for the swappable backing store, and in particular
-	# the shmem_readpage() which depends upon tmpfs
-	select SHMEM
-	select TMPFS
+	help
+	  Helpers for VRAM memory management
+
+config DRM_TTM_HELPER
+	tristate
+	depends on DRM
+	select DRM_TTM
+	select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+	select FB_CORE if DRM_FBDEV_EMULATION
+	select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
+	help
+	  Helpers for ttm-based gem objects
+
+config DRM_GEM_DMA_HELPER
+	tristate
+	depends on DRM
+	select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+	select FB_CORE if DRM_FBDEV_EMULATION
+	select FB_DMAMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
+	help
+	  Choose this if you need the GEM DMA helper functions
+
+config DRM_GEM_SHMEM_HELPER
+	tristate
+	depends on DRM && MMU
+	select DRM_KMS_HELPER if DRM_FBDEV_EMULATION
+	select FB_CORE if DRM_FBDEV_EMULATION
+	select FB_SYSMEM_HELPERS_DEFERRED if DRM_FBDEV_EMULATION
+	help
+	  Choose this if you need the GEM shmem helper functions
+
+config DRM_SUBALLOC_HELPER
+	tristate
+	depends on DRM
+
+config DRM_SCHED
+	tristate
+	depends on DRM
+
+source "drivers/gpu/drm/sysfb/Kconfig"
+
+source "drivers/gpu/drm/arm/Kconfig"
+
+source "drivers/gpu/drm/radeon/Kconfig"
+
+source "drivers/gpu/drm/amd/amdgpu/Kconfig"
+
+source "drivers/gpu/drm/nouveau/Kconfig"
+
+source "drivers/gpu/drm/nova/Kconfig"
+
+source "drivers/gpu/drm/i915/Kconfig"
+
+source "drivers/gpu/drm/xe/Kconfig"
+
+source "drivers/gpu/drm/kmb/Kconfig"
+
+config DRM_VGEM
+	tristate "Virtual GEM provider"
+	depends on DRM && MMU
+	select DRM_GEM_SHMEM_HELPER
+	help
+	  Choose this option to get a virtual graphics memory manager,
+	  as used by Mesa's software renderer for enhanced performance.
+	  If M is selected the module will be called vgem.
+
+source "drivers/gpu/drm/vkms/Kconfig"
+
+source "drivers/gpu/drm/exynos/Kconfig"
+
+source "drivers/gpu/drm/rockchip/Kconfig"
+
+source "drivers/gpu/drm/vmwgfx/Kconfig"
+
+source "drivers/gpu/drm/gma500/Kconfig"
+
+source "drivers/gpu/drm/udl/Kconfig"
+
+source "drivers/gpu/drm/ast/Kconfig"
+
+source "drivers/gpu/drm/mgag200/Kconfig"
+
+source "drivers/gpu/drm/armada/Kconfig"
+
+source "drivers/gpu/drm/atmel-hlcdc/Kconfig"
+
+source "drivers/gpu/drm/renesas/Kconfig"
+
+source "drivers/gpu/drm/sun4i/Kconfig"
+
+source "drivers/gpu/drm/omapdrm/Kconfig"
+
+source "drivers/gpu/drm/tilcdc/Kconfig"
+
+source "drivers/gpu/drm/qxl/Kconfig"
+
+source "drivers/gpu/drm/virtio/Kconfig"
+
+source "drivers/gpu/drm/msm/Kconfig"
+
+source "drivers/gpu/drm/fsl-dcu/Kconfig"
+
+source "drivers/gpu/drm/tegra/Kconfig"
+
+source "drivers/gpu/drm/stm/Kconfig"
+
+source "drivers/gpu/drm/panel/Kconfig"
+
+source "drivers/gpu/drm/bridge/Kconfig"
+
+source "drivers/gpu/drm/sti/Kconfig"
+
+source "drivers/gpu/drm/imx/Kconfig"
+
+source "drivers/gpu/drm/ingenic/Kconfig"
+
+source "drivers/gpu/drm/v3d/Kconfig"
+
+source "drivers/gpu/drm/vc4/Kconfig"
+
+source "drivers/gpu/drm/loongson/Kconfig"
+
+source "drivers/gpu/drm/etnaviv/Kconfig"
+
+source "drivers/gpu/drm/hisilicon/Kconfig"
+
+source "drivers/gpu/drm/logicvc/Kconfig"
+
+source "drivers/gpu/drm/mediatek/Kconfig"
+
+source "drivers/gpu/drm/mxsfb/Kconfig"
+
+source "drivers/gpu/drm/meson/Kconfig"
+
+source "drivers/gpu/drm/tiny/Kconfig"
+
+source "drivers/gpu/drm/pl111/Kconfig"
+
+source "drivers/gpu/drm/tve200/Kconfig"
+
+source "drivers/gpu/drm/xen/Kconfig"
+
+source "drivers/gpu/drm/vboxvideo/Kconfig"
+
+source "drivers/gpu/drm/lima/Kconfig"
+
+source "drivers/gpu/drm/panfrost/Kconfig"
+
+source "drivers/gpu/drm/panthor/Kconfig"
+
+source "drivers/gpu/drm/aspeed/Kconfig"
+
+source "drivers/gpu/drm/mcde/Kconfig"
+
+source "drivers/gpu/drm/tidss/Kconfig"
+
+source "drivers/gpu/drm/adp/Kconfig"
+
+source "drivers/gpu/drm/xlnx/Kconfig"
+
+source "drivers/gpu/drm/gud/Kconfig"
+
+source "drivers/gpu/drm/sitronix/Kconfig"
+
+source "drivers/gpu/drm/solomon/Kconfig"
+
+source "drivers/gpu/drm/sprd/Kconfig"
+
+source "drivers/gpu/drm/imagination/Kconfig"
+
+source "drivers/gpu/drm/tyr/Kconfig"
+
+config DRM_HYPERV
+	tristate "DRM Support for Hyper-V synthetic video device"
+	depends on DRM && PCI && HYPERV_VMBUS
+	select DRM_CLIENT_SELECTION
 	select DRM_KMS_HELPER
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	# i915 depends on ACPI_VIDEO when ACPI is enabled
-	# but for select to work, need to select ACPI_VIDEO's dependencies, ick
-	select BACKLIGHT_CLASS_DEVICE if ACPI
-	select VIDEO_OUTPUT_CONTROL if ACPI
-	select INPUT if ACPI
-	select ACPI_VIDEO if ACPI
-	select ACPI_BUTTON if ACPI
-	help
-	  Choose this option if you have a system that has "Intel Graphics
-	  Media Accelerator" or "HD Graphics" integrated graphics,
-	  including 830M, 845G, 852GM, 855GM, 865G, 915G, 945G, 965G,
-	  G35, G41, G43, G45 chipsets and Celeron, Pentium, Core i3,
-	  Core i5, Core i7 as well as Atom CPUs with integrated graphics.
-	  If M is selected, the module will be called i915.  AGP support
-	  is required for this driver to work. This driver is used by
-	  the Intel driver in X.org 6.8 and XFree86 4.4 and above. It
-	  replaces the older i830 module that supported a subset of the
-	  hardware in older X.org releases.
-
-	  Note that the older i810/i815 chipsets require the use of the
-	  i810 driver instead, and the Atom z5xx series has an entirely
-	  different implementation.
-
-config DRM_I915_KMS
-	bool "Enable modesetting on intel by default"
-	depends on DRM_I915
-	help
-	  Choose this option if you want kernel modesetting enabled by default,
-	  and you have a new enough userspace to support this. Running old
-	  userspaces with this enabled will cause pain.  Note that this causes
-	  the driver to bind to PCI devices, which precludes loading things
-	  like intelfb.
-
-config DRM_MGA
-	tristate "Matrox g200/g400"
-	depends on DRM && PCI
-	select FW_LOADER
-	help
-	  Choose this option if you have a Matrox G200, G400 or G450 graphics
-	  card.  If M is selected, the module will be called mga.  AGP
-	  support is required for this driver to work.
-
-config DRM_SIS
-	tristate "SiS video cards"
-	depends on DRM && AGP
-	depends on FB_SIS || FB_SIS=n
-	help
-	  Choose this option if you have a SiS 630 or compatible video
-          chipset. If M is selected the module will be called sis. AGP
-          support is required for this driver to work.
-
-config DRM_VIA
-	tristate "Via unichrome video cards"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a Via unichrome or compatible video
-	  chipset. If M is selected the module will be called via.
-
-config DRM_SAVAGE
-	tristate "Savage video cards"
-	depends on DRM && PCI
-	help
-	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
-	  chipset. If M is selected the module will be called savage.
+	select DRM_GEM_SHMEM_HELPER
+	help
+	 This is a KMS driver for Hyper-V synthetic video device. Choose this
+	 option if you would like to enable drm driver for Hyper-V virtual
+	 machine. Unselect Hyper-V framebuffer driver (CONFIG_FB_HYPERV) so
+	 that DRM driver is used by default.
+
+	 If M is selected the module will be called hyperv_drm.
+
+# Separate option as not all DRM drivers use it
+config DRM_PANEL_BACKLIGHT_QUIRKS
+	tristate
+
+config DRM_LIB_RANDOM
+	bool
+	default n
+
+config DRM_PRIVACY_SCREEN
+	bool
+	default n
+
+endif
+
+# Separate option because drm_panel_orientation_quirks.c is shared with fbdev
+config DRM_PANEL_ORIENTATION_QUIRKS
+	tristate
diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug
new file mode 100644
index 000000000000..05dc43c0b8c5
--- /dev/null
+++ b/drivers/gpu/drm/Kconfig.debug
@@ -0,0 +1,117 @@
+config DRM_USE_DYNAMIC_DEBUG
+	bool "use dynamic debug to implement drm.debug"
+	default n
+	depends on BROKEN
+	depends on DRM
+	depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE
+	depends on JUMP_LABEL
+	help
+	 Use dynamic-debug to avoid drm_debug_enabled() runtime overheads.
+	 Due to callsite counts in DRM drivers (~4k in amdgpu) and 56
+	 bytes per callsite, the .data costs can be substantial, and
+	 are therefore configurable.
+
+config DRM_WERROR
+	bool "Compile the drm subsystem with warnings as errors"
+	depends on DRM && EXPERT
+	depends on !WERROR
+	default n
+	help
+	  A kernel build should not cause any compiler warnings, and this
+	  enables the '-Werror' flag to enforce that rule in the drm subsystem.
+
+	  The drm subsystem enables more warnings than the kernel default, so
+	  this config option is disabled by default.
+
+	  If in doubt, say N.
+
+config DRM_HEADER_TEST
+	bool "Ensure DRM headers are self-contained and pass kernel-doc"
+	depends on DRM && EXPERT && BROKEN
+	default n
+	help
+	  Ensure the DRM subsystem headers both under drivers/gpu/drm and
+	  include/drm compile, are self-contained, have header guards, and have
+	  no kernel-doc warnings.
+
+	  If in doubt, say N.
+
+config DRM_DEBUG_MM
+	bool "Insert extra checks and debug info into the DRM range managers"
+	default n
+	depends on DRM
+	depends on STACKTRACE_SUPPORT
+	select STACKDEPOT
+	help
+	  Enable allocation tracking of memory manager and leak detection on
+	  shutdown.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_KUNIT_TEST_HELPERS
+	tristate
+	depends on DRM && KUNIT
+	select DRM_KMS_HELPER
+	help
+	  KUnit Helpers for KMS drivers.
+
+config DRM_KUNIT_TEST
+	tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS
+	depends on DRM && KUNIT && MMU
+	select DRM_BRIDGE_CONNECTOR
+	select DRM_BUDDY
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDMI_STATE_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_EXEC
+	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_GEM_SHMEM_HELPER
+	select DRM_KUNIT_TEST_HELPERS
+	select DRM_LIB_RANDOM
+	select DRM_SYSFB_HELPER
+	select PRIME_NUMBERS
+	default KUNIT_ALL_TESTS
+	help
+	  This builds unit tests for DRM. This option is not useful for
+	  distributions or general kernels, but only for kernel
+	  developers working on DRM and associated drivers.
+
+	  For more information on KUnit and unit tests in general,
+	  please refer to the KUnit documentation in
+	  Documentation/dev-tools/kunit/.
+
+	  If in doubt, say "N".
+
+config DRM_TTM_KUNIT_TEST
+	tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS
+	default n
+	depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST)
+	select DRM_TTM
+	select DRM_BUDDY
+	select DRM_EXPORT_FOR_TESTS if m
+	select DRM_KUNIT_TEST_HELPERS
+	default KUNIT_ALL_TESTS
+	help
+	  Enables unit tests for TTM, a GPU memory manager subsystem used
+	  to manage memory buffers. This option is mostly useful for kernel
+	  developers. It depends on (UML || COMPILE_TEST) since no other driver
+	  which uses TTM can be loaded while running the tests.
+
+	  If in doubt, say "N".
+
+config DRM_SCHED_KUNIT_TEST
+	tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS
+	select DRM_SCHED
+	depends on DRM && KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Choose this option to build unit tests for the DRM scheduler.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+config DRM_EXPORT_FOR_TESTS
+	bool
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 89cf05a72d1c..0e1c668b46d2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -1,38 +1,255 @@
-#
+# SPDX-License-Identifier: GPL-2.0
+
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm
+CFLAGS-$(CONFIG_DRM_USE_DYNAMIC_DEBUG)	+= -DDYNAMIC_DEBUG_MODULE
 
-drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
-		drm_context.o drm_dma.o \
-		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
-		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
-		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
-		drm_platform.o drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
-		drm_crtc.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o drm_encoder_slave.o \
-		drm_trace_points.o drm_global.o drm_usb.o
+# Unconditionally enable W=1 warnings locally
+# --- begin copy-paste W=1 warnings from scripts/Makefile.warn
+subdir-ccflags-y += -Wextra -Wunused -Wno-unused-parameter
+subdir-ccflags-y += $(call cc-option, -Wrestrict)
+subdir-ccflags-y += -Wmissing-format-attribute
+subdir-ccflags-y += -Wold-style-definition
+subdir-ccflags-y += -Wmissing-include-dirs
+subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
+subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
+subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
+subdir-ccflags-y += $(call cc-option, -Wformat-overflow)
+# FIXME: fix -Wformat-truncation warnings and uncomment
+#subdir-ccflags-y += $(call cc-option, -Wformat-truncation)
+subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+# The following turn off the warnings enabled by -Wextra
+ifeq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-missing-field-initializers
+subdir-ccflags-y += -Wno-type-limits
+subdir-ccflags-y += -Wno-shift-negative-value
+endif
+ifeq ($(findstring 3, $(KBUILD_EXTRA_WARN)),)
+subdir-ccflags-y += -Wno-sign-compare
+endif
+# --- end copy-paste
 
+# Enable -Werror in CI and development
+subdir-ccflags-$(CONFIG_DRM_WERROR) += -Werror
+
+drm-y := \
+	drm_atomic.o \
+	drm_atomic_uapi.o \
+	drm_auth.o \
+	drm_blend.o \
+	drm_bridge.o \
+	drm_cache.o \
+	drm_color_mgmt.o \
+	drm_colorop.o \
+	drm_connector.o \
+	drm_crtc.o \
+	drm_displayid.o \
+	drm_drv.o \
+	drm_dumb_buffers.o \
+	drm_edid.o \
+	drm_eld.o \
+	drm_encoder.o \
+	drm_file.o \
+	drm_fourcc.o \
+	drm_framebuffer.o \
+	drm_gem.o \
+	drm_ioctl.o \
+	drm_lease.o \
+	drm_managed.o \
+	drm_mm.o \
+	drm_mode_config.o \
+	drm_mode_object.o \
+	drm_modes.o \
+	drm_modeset_lock.o \
+	drm_plane.o \
+	drm_prime.o \
+	drm_print.o \
+	drm_property.o \
+	drm_rect.o \
+	drm_syncobj.o \
+	drm_sysfs.o \
+	drm_trace_points.o \
+	drm_vblank.o \
+	drm_vblank_work.o \
+	drm_vma_manager.o \
+	drm_writeback.o
+drm-$(CONFIG_DRM_CLIENT) += \
+	drm_client.o \
+	drm_client_event.o \
+	drm_client_modeset.o \
+	drm_client_sysrq.o
+drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
+drm-$(CONFIG_DRM_PANEL) += drm_panel.o
+drm-$(CONFIG_OF) += drm_of.o
+drm-$(CONFIG_PCI) += drm_pci.o
+drm-$(CONFIG_DEBUG_FS) += \
+	drm_debugfs.o \
+	drm_debugfs_crc.o
+drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
+drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
+	drm_privacy_screen.o \
+	drm_privacy_screen_x86.o
+drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
+drm-$(CONFIG_DRM_PANIC) += drm_panic.o
+drm-$(CONFIG_DRM_DRAW) += drm_draw.o
+drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o
+obj-$(CONFIG_DRM)	+= drm.o
+
+obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
+obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o
+
+#
+# Memory-management helpers
+#
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
+obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
+
+drm_gpusvm_helper-y := \
+	drm_gpusvm.o\
+	drm_pagemap.o
+obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o
+
+obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
+
+drm_dma_helper-y := drm_gem_dma_helper.o
+drm_dma_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_dma.o
+drm_dma_helper-$(CONFIG_DRM_KMS_HELPER) += drm_fb_dma_helper.o
+obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o
+
+drm_shmem_helper-y := drm_gem_shmem_helper.o
+drm_shmem_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_shmem.o
+obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o
 
-drm_kms_helper-y := drm_fb_helper.o drm_crtc_helper.o drm_dp_i2c_helper.o
+drm_suballoc_helper-y := drm_suballoc.o
+obj-$(CONFIG_DRM_SUBALLOC_HELPER) += drm_suballoc_helper.o
 
+drm_vram_helper-y := drm_gem_vram_helper.o
+obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
+
+drm_ttm_helper-y := drm_gem_ttm_helper.o
+drm_ttm_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_ttm.o
+obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o
+
+#
+# Modesetting helpers
+#
+
+drm_kms_helper-y := \
+	drm_atomic_helper.o \
+	drm_atomic_state_helper.o \
+	drm_bridge_helper.o \
+	drm_crtc_helper.o \
+	drm_damage_helper.o \
+	drm_flip_work.o \
+	drm_format_helper.o \
+	drm_gem_atomic_helper.o \
+	drm_gem_framebuffer_helper.o \
+	drm_kms_helper_common.o \
+	drm_modeset_helper.o \
+	drm_plane_helper.o \
+	drm_probe_helper.o \
+	drm_self_refresh_helper.o \
+	drm_simple_kms_helper.o \
+	drm_vblank_helper.o
+drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
+drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 
-CFLAGS_drm_trace_points.o := -I$(src)
+#
+# Drivers and the rest
+#
 
-obj-$(CONFIG_DRM)	+= drm.o
+obj-y			+= tests/
+
+obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o
+obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
+obj-y			+= arm/
+obj-y			+= clients/
+obj-y			+= display/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
-obj-$(CONFIG_DRM_TDFX)	+= tdfx/
-obj-$(CONFIG_DRM_R128)	+= r128/
+obj-$(CONFIG_DRM_SCHED)	+= scheduler/
 obj-$(CONFIG_DRM_RADEON)+= radeon/
-obj-$(CONFIG_DRM_MGA)	+= mga/
-obj-$(CONFIG_DRM_I810)	+= i810/
-obj-$(CONFIG_DRM_I915)  += i915/
-obj-$(CONFIG_DRM_SIS)   += sis/
-obj-$(CONFIG_DRM_SAVAGE)+= savage/
+obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
+obj-$(CONFIG_DRM_AMDGPU)+= amd/amdxcp/
+obj-$(CONFIG_DRM_I915)	+= i915/
+obj-$(CONFIG_DRM_XE)	+= xe/
+obj-$(CONFIG_DRM_KMB_DISPLAY)  += kmb/
+obj-$(CONFIG_DRM_MGAG200) += mgag200/
+obj-$(CONFIG_DRM_V3D)  += v3d/
+obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
-obj-$(CONFIG_DRM_VIA)	+=via/
+obj-$(CONFIG_DRM_VGEM)	+= vgem/
+obj-$(CONFIG_DRM_VKMS)	+= vkms/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
-obj-y			+= i2c/
+obj-$(CONFIG_DRM_NOVA) += nova/
+obj-$(CONFIG_DRM_EXYNOS) +=exynos/
+obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/
+obj-$(CONFIG_DRM_GMA500) += gma500/
+obj-$(CONFIG_DRM_UDL) += udl/
+obj-$(CONFIG_DRM_AST) += ast/
+obj-$(CONFIG_DRM_ARMADA) += armada/
+obj-$(CONFIG_DRM_ATMEL_HLCDC)	+= atmel-hlcdc/
+obj-y			+= renesas/
+obj-y			+= omapdrm/
+obj-$(CONFIG_DRM_SUN4I) += sun4i/
+obj-y			+= tilcdc/
+obj-$(CONFIG_DRM_QXL) += qxl/
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
+obj-$(CONFIG_DRM_MSM) += msm/
+obj-$(CONFIG_DRM_TEGRA) += tegra/
+obj-$(CONFIG_DRM_STM) += stm/
+obj-$(CONFIG_DRM_STI) += sti/
+obj-y 			+= imx/
+obj-$(CONFIG_DRM_INGENIC) += ingenic/
+obj-$(CONFIG_DRM_LOGICVC) += logicvc/
+obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
+obj-$(CONFIG_DRM_MESON)	+= meson/
+obj-y			+= panel/
+obj-y			+= bridge/
+obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
+obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
+obj-y			+= hisilicon/
+obj-y			+= mxsfb/
+obj-y			+= sysfb/
+obj-y			+= tiny/
+obj-$(CONFIG_DRM_PL111) += pl111/
+obj-$(CONFIG_DRM_TVE200) += tve200/
+obj-$(CONFIG_DRM_ADP) += adp/
+obj-$(CONFIG_DRM_XEN) += xen/
+obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
+obj-$(CONFIG_DRM_LIMA)  += lima/
+obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
+obj-$(CONFIG_DRM_TYR) += tyr/
+obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
+obj-$(CONFIG_DRM_MCDE) += mcde/
+obj-$(CONFIG_DRM_TIDSS) += tidss/
+obj-y			+= xlnx/
+obj-y			+= gud/
+obj-$(CONFIG_DRM_HYPERV) += hyperv/
+obj-y			+= sitronix/
+obj-y			+= solomon/
+obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_LOONGSON) += loongson/
+obj-$(CONFIG_DRM_POWERVR) += imagination/
+
+# Ensure drm headers are self-contained and pass kernel-doc
+hdrtest-files := \
+	$(shell cd $(src) && find . -maxdepth 1 -name 'drm_*.h') \
+	$(shell cd $(src) && find display lib -name '*.h')
+
+always-$(CONFIG_DRM_HEADER_TEST) += \
+	$(patsubst %.h,%.hdrtest, $(hdrtest-files))
+
+# Include the header twice to detect missing include guard.
+quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
+      cmd_hdrtest = \
+		$(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \
+		 PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \
+		touch $@
+
+$(obj)/%.hdrtest: $(src)/%.h FORCE
+	$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/README.drm b/drivers/gpu/drm/README.drm
deleted file mode 100644
index b5b332722581..000000000000
--- a/drivers/gpu/drm/README.drm
+++ /dev/null
@@ -1,43 +0,0 @@
-************************************************************
-* For the very latest on DRI development, please see:      *
-*     http://dri.freedesktop.org/                          *
-************************************************************
-
-The Direct Rendering Manager (drm) is a device-independent kernel-level
-device driver that provides support for the XFree86 Direct Rendering
-Infrastructure (DRI).
-
-The DRM supports the Direct Rendering Infrastructure (DRI) in four major
-ways:
-
-    1. The DRM provides synchronized access to the graphics hardware via
-       the use of an optimized two-tiered lock.
-
-    2. The DRM enforces the DRI security policy for access to the graphics
-       hardware by only allowing authenticated X11 clients access to
-       restricted regions of memory.
-
-    3. The DRM provides a generic DMA engine, complete with multiple
-       queues and the ability to detect the need for an OpenGL context
-       switch.
-
-    4. The DRM is extensible via the use of small device-specific modules
-       that rely extensively on the API exported by the DRM module.
-
-
-Documentation on the DRI is available from:
-    http://dri.freedesktop.org/wiki/Documentation
-    http://sourceforge.net/project/showfiles.php?group_id=387
-    http://dri.sourceforge.net/doc/
-
-For specific information about kernel-level support, see:
-
-    The Direct Rendering Manager, Kernel Support for the Direct Rendering
-    Infrastructure
-    http://dri.sourceforge.net/doc/drm_low_level.html
-
-    Hardware Locking for the Direct Rendering Infrastructure
-    http://dri.sourceforge.net/doc/hardware_locking_low_level.html
-
-    A Security Analysis of the Direct Rendering Infrastructure
-    http://dri.sourceforge.net/doc/security_low_level.html
diff --git a/drivers/gpu/drm/adp/Kconfig b/drivers/gpu/drm/adp/Kconfig
new file mode 100644
index 000000000000..9fcc27eb200d
--- /dev/null
+++ b/drivers/gpu/drm/adp/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+config DRM_ADP
+	tristate "DRM Support for pre-DCP Apple display controllers"
+	depends on DRM && OF && ARM64
+	depends on ARCH_APPLE || COMPILE_TEST
+	select DRM_KMS_HELPER
+	select DRM_BRIDGE_CONNECTOR
+	select DRM_DISPLAY_HELPER
+	select DRM_KMS_DMA_HELPER
+	select DRM_GEM_DMA_HELPER
+	select DRM_PANEL_BRIDGE
+	select VIDEOMODE_HELPERS
+	select DRM_MIPI_DSI
+	help
+	  Chose this option if you have an Apple Arm laptop with a touchbar.
+
+	  If M is selected, this module will be called adpdrm.
diff --git a/drivers/gpu/drm/adp/Makefile b/drivers/gpu/drm/adp/Makefile
new file mode 100644
index 000000000000..8e7b618edd35
--- /dev/null
+++ b/drivers/gpu/drm/adp/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+adpdrm-y := adp_drv.o
+adpdrm-mipi-y := adp-mipi.o
+obj-$(CONFIG_DRM_ADP) += adpdrm.o adpdrm-mipi.o
diff --git a/drivers/gpu/drm/adp/adp-mipi.c b/drivers/gpu/drm/adp/adp-mipi.c
new file mode 100644
index 000000000000..cba7d32150a9
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp-mipi.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_mipi_dsi.h>
+
+#define DSI_GEN_HDR 0x6c
+#define DSI_GEN_PLD_DATA 0x70
+
+#define DSI_CMD_PKT_STATUS 0x74
+
+#define GEN_PLD_R_EMPTY BIT(4)
+#define GEN_PLD_W_FULL BIT(3)
+#define GEN_PLD_W_EMPTY BIT(2)
+#define GEN_CMD_FULL BIT(1)
+#define GEN_CMD_EMPTY BIT(0)
+#define GEN_RD_CMD_BUSY BIT(6)
+#define CMD_PKT_STATUS_TIMEOUT_US 20000
+
+struct adp_mipi_drv_private {
+	struct mipi_dsi_host dsi;
+	struct drm_bridge bridge;
+	struct drm_bridge *next_bridge;
+	void __iomem *mipi;
+};
+
+#define mipi_to_adp(x) container_of(x, struct adp_mipi_drv_private, dsi)
+
+static int adp_dsi_gen_pkt_hdr_write(struct adp_mipi_drv_private *adp, u32 hdr_val)
+{
+	int ret;
+	u32 val, mask;
+
+	ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+				 val, !(val & GEN_CMD_FULL), 1000,
+				 CMD_PKT_STATUS_TIMEOUT_US);
+	if (ret) {
+		dev_err(adp->dsi.dev, "failed to get available command FIFO\n");
+		return ret;
+	}
+
+	writel(hdr_val, adp->mipi + DSI_GEN_HDR);
+
+	mask = GEN_CMD_EMPTY | GEN_PLD_W_EMPTY;
+	ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+				 val, (val & mask) == mask,
+				 1000, CMD_PKT_STATUS_TIMEOUT_US);
+	if (ret) {
+		dev_err(adp->dsi.dev, "failed to write command FIFO\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int adp_dsi_write(struct adp_mipi_drv_private *adp,
+			 const struct mipi_dsi_packet *packet)
+{
+	const u8 *tx_buf = packet->payload;
+	int len = packet->payload_length, pld_data_bytes = sizeof(u32), ret;
+	__le32 word;
+	u32 val;
+
+	while (len) {
+		if (len < pld_data_bytes) {
+			word = 0;
+			memcpy(&word, tx_buf, len);
+			writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+			len = 0;
+		} else {
+			memcpy(&word, tx_buf, pld_data_bytes);
+			writel(le32_to_cpu(word), adp->mipi + DSI_GEN_PLD_DATA);
+			tx_buf += pld_data_bytes;
+			len -= pld_data_bytes;
+		}
+
+		ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+					 val, !(val & GEN_PLD_W_FULL), 1000,
+					 CMD_PKT_STATUS_TIMEOUT_US);
+		if (ret) {
+			dev_err(adp->dsi.dev,
+				"failed to get available write payload FIFO\n");
+			return ret;
+		}
+	}
+
+	word = 0;
+	memcpy(&word, packet->header, sizeof(packet->header));
+	return adp_dsi_gen_pkt_hdr_write(adp, le32_to_cpu(word));
+}
+
+static int adp_dsi_read(struct adp_mipi_drv_private *adp,
+			const struct mipi_dsi_msg *msg)
+{
+	int i, j, ret, len = msg->rx_len;
+	u8 *buf = msg->rx_buf;
+	u32 val;
+
+	/* Wait end of the read operation */
+	ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+				 val, !(val & GEN_RD_CMD_BUSY),
+				 1000, CMD_PKT_STATUS_TIMEOUT_US);
+	if (ret) {
+		dev_err(adp->dsi.dev, "Timeout during read operation\n");
+		return ret;
+	}
+
+	for (i = 0; i < len; i += 4) {
+		/* Read fifo must not be empty before all bytes are read */
+		ret = readl_poll_timeout(adp->mipi + DSI_CMD_PKT_STATUS,
+					 val, !(val & GEN_PLD_R_EMPTY),
+					 1000, CMD_PKT_STATUS_TIMEOUT_US);
+		if (ret) {
+			dev_err(adp->dsi.dev, "Read payload FIFO is empty\n");
+			return ret;
+		}
+
+		val = readl(adp->mipi + DSI_GEN_PLD_DATA);
+		for (j = 0; j < 4 && j + i < len; j++)
+			buf[i + j] = val >> (8 * j);
+	}
+
+	return ret;
+}
+
+static ssize_t adp_dsi_host_transfer(struct mipi_dsi_host *host,
+				     const struct mipi_dsi_msg *msg)
+{
+	struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+	struct mipi_dsi_packet packet;
+	int ret, nb_bytes;
+
+	ret = mipi_dsi_create_packet(&packet, msg);
+	if (ret) {
+		dev_err(adp->dsi.dev, "failed to create packet: %d\n", ret);
+		return ret;
+	}
+
+	ret = adp_dsi_write(adp, &packet);
+	if (ret)
+		return ret;
+
+	if (msg->rx_buf && msg->rx_len) {
+		ret = adp_dsi_read(adp, msg);
+		if (ret)
+			return ret;
+		nb_bytes = msg->rx_len;
+	} else {
+		nb_bytes = packet.size;
+	}
+
+	return nb_bytes;
+}
+
+static int adp_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+	return 0;
+}
+
+static void adp_dsi_unbind(struct device *dev, struct device *master, void *data)
+{
+}
+
+static const struct component_ops adp_dsi_component_ops = {
+	.bind	= adp_dsi_bind,
+	.unbind	= adp_dsi_unbind,
+};
+
+static int adp_dsi_host_attach(struct mipi_dsi_host *host,
+			       struct mipi_dsi_device *dev)
+{
+	struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+	struct drm_bridge *next;
+	int ret;
+
+	next = devm_drm_of_get_bridge(adp->dsi.dev, adp->dsi.dev->of_node, 1, 0);
+	if (IS_ERR(next))
+		return PTR_ERR(next);
+
+	adp->next_bridge = next;
+
+	drm_bridge_add(&adp->bridge);
+
+	ret = component_add(host->dev, &adp_dsi_component_ops);
+	if (ret) {
+		pr_err("failed to add dsi_host component: %d\n", ret);
+		drm_bridge_remove(&adp->bridge);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int adp_dsi_host_detach(struct mipi_dsi_host *host,
+			       struct mipi_dsi_device *dev)
+{
+	struct adp_mipi_drv_private *adp = mipi_to_adp(host);
+
+	component_del(host->dev, &adp_dsi_component_ops);
+	drm_bridge_remove(&adp->bridge);
+	return 0;
+}
+
+static const struct mipi_dsi_host_ops adp_dsi_host_ops = {
+	.transfer = adp_dsi_host_transfer,
+	.attach = adp_dsi_host_attach,
+	.detach = adp_dsi_host_detach,
+};
+
+static int adp_dsi_bridge_attach(struct drm_bridge *bridge,
+				 struct drm_encoder *encoder,
+				 enum drm_bridge_attach_flags flags)
+{
+	struct adp_mipi_drv_private *adp =
+		container_of(bridge, struct adp_mipi_drv_private, bridge);
+
+	return drm_bridge_attach(encoder, adp->next_bridge, bridge, flags);
+}
+
+static const struct drm_bridge_funcs adp_dsi_bridge_funcs = {
+	.attach	= adp_dsi_bridge_attach,
+};
+
+static int adp_mipi_probe(struct platform_device *pdev)
+{
+	struct adp_mipi_drv_private *adp;
+
+	adp = devm_drm_bridge_alloc(&pdev->dev, struct adp_mipi_drv_private,
+				    bridge, &adp_dsi_bridge_funcs);
+	if (IS_ERR(adp))
+		return PTR_ERR(adp);
+
+	adp->mipi = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(adp->mipi)) {
+		dev_err(&pdev->dev, "failed to map mipi mmio");
+		return PTR_ERR(adp->mipi);
+	}
+
+	adp->dsi.dev = &pdev->dev;
+	adp->dsi.ops = &adp_dsi_host_ops;
+	adp->bridge.of_node = pdev->dev.of_node;
+	adp->bridge.type = DRM_MODE_CONNECTOR_DSI;
+	dev_set_drvdata(&pdev->dev, adp);
+	return mipi_dsi_host_register(&adp->dsi);
+}
+
+static void adp_mipi_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct adp_mipi_drv_private *adp = dev_get_drvdata(dev);
+
+	mipi_dsi_host_unregister(&adp->dsi);
+}
+
+static const struct of_device_id adp_mipi_of_match[] = {
+	{ .compatible = "apple,h7-display-pipe-mipi", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, adp_mipi_of_match);
+
+static struct platform_driver adp_mipi_platform_driver = {
+	.driver = {
+		.name = "adp-mipi",
+		.of_match_table = adp_mipi_of_match,
+	},
+	.probe = adp_mipi_probe,
+	.remove = adp_mipi_remove,
+};
+
+module_platform_driver(adp_mipi_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe MIPI driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c
new file mode 100644
index 000000000000..4554cf75565e
--- /dev/null
+++ b/drivers/gpu/drm/adp/adp_drv.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/component.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_bridge_connector.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_dma_helper.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#define ADP_INT_STATUS 0x34
+#define ADP_INT_STATUS_INT_MASK 0x7
+#define ADP_INT_STATUS_VBLANK 0x1
+#define ADP_CTRL 0x100
+#define ADP_CTRL_VBLANK_ON 0x12
+#define ADP_CTRL_FIFO_ON 0x601
+#define ADP_SCREEN_SIZE 0x0c
+#define ADP_SCREEN_HSIZE GENMASK(15, 0)
+#define ADP_SCREEN_VSIZE GENMASK(31, 16)
+
+#define ADBE_FIFO 0x10c0
+#define ADBE_FIFO_SYNC 0xc0000000
+
+#define ADBE_BLEND_BYPASS 0x2020
+#define ADBE_BLEND_EN1 0x2028
+#define ADBE_BLEND_EN2 0x2074
+#define ADBE_BLEND_EN3 0x202c
+#define ADBE_BLEND_EN4 0x2034
+#define ADBE_MASK_BUF 0x2200
+
+#define ADBE_SRC_START 0x4040
+#define ADBE_SRC_SIZE 0x4048
+#define ADBE_DST_START 0x4050
+#define ADBE_DST_SIZE 0x4054
+#define ADBE_STRIDE 0x4038
+#define ADBE_FB_BASE 0x4030
+
+#define ADBE_LAYER_EN1 0x4020
+#define ADBE_LAYER_EN2 0x4068
+#define ADBE_LAYER_EN3 0x40b4
+#define ADBE_LAYER_EN4 0x40f4
+#define ADBE_SCALE_CTL 0x40ac
+#define ADBE_SCALE_CTL_BYPASS 0x100000
+
+#define ADBE_LAYER_CTL 0x1038
+#define ADBE_LAYER_CTL_ENABLE 0x10000
+
+#define ADBE_PIX_FMT 0x402c
+#define ADBE_PIX_FMT_XRGB32 0x53e4001
+
+static int adp_open(struct inode *inode, struct file *filp)
+{
+	/*
+	 * The modesetting driver does not check the non-desktop connector
+	 * property and keeps the device open and locked. If the touchbar daemon
+	 * opens the device first, modesetting breaks the whole X session.
+	 * Simply refuse to open the device for X11 server processes as
+	 * workaround.
+	 */
+	if (current->comm[0] == 'X')
+		return -EBUSY;
+
+	return drm_open(inode, filp);
+}
+
+static const struct file_operations adp_fops = {
+	.owner          = THIS_MODULE,
+	.open           = adp_open,
+	.release        = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl   = drm_compat_ioctl,
+	.poll           = drm_poll,
+	.read           = drm_read,
+	.llseek         = noop_llseek,
+	.mmap           = drm_gem_mmap,
+	.fop_flags      = FOP_UNSIGNED_OFFSET,
+	DRM_GEM_DMA_UNMAPPED_AREA_FOPS
+};
+
+static int adp_drm_gem_dumb_create(struct drm_file *file_priv,
+					struct drm_device *drm,
+					struct drm_mode_create_dumb *args)
+{
+	args->height = ALIGN(args->height, 64);
+	args->size = args->pitch * args->height;
+
+	return drm_gem_dma_dumb_create_internal(file_priv, drm, args);
+}
+
+static const struct drm_driver adp_driver = {
+	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
+	.fops = &adp_fops,
+	DRM_GEM_DMA_DRIVER_OPS_VMAP_WITH_DUMB_CREATE(adp_drm_gem_dumb_create),
+	.name = "adp",
+	.desc = "Apple Display Pipe DRM Driver",
+	.major = 0,
+	.minor = 1,
+};
+
+struct adp_drv_private {
+	struct drm_device drm;
+	struct drm_crtc crtc;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct drm_bridge *next_bridge;
+	void __iomem *be;
+	void __iomem *fe;
+	u32 *mask_buf;
+	u64 mask_buf_size;
+	dma_addr_t mask_iova;
+	int be_irq;
+	int fe_irq;
+	struct drm_pending_vblank_event *event;
+};
+
+#define to_adp(x) container_of(x, struct adp_drv_private, drm)
+#define crtc_to_adp(x) container_of(x, struct adp_drv_private, crtc)
+
+static int adp_plane_atomic_check(struct drm_plane *plane,
+				    struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_plane_state;
+	struct drm_crtc_state *crtc_state;
+
+	new_plane_state = drm_atomic_get_new_plane_state(state, plane);
+
+	if (!new_plane_state->crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state, new_plane_state->crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	return drm_atomic_helper_check_plane_state(new_plane_state,
+						   crtc_state,
+						   DRM_PLANE_NO_SCALING,
+						   DRM_PLANE_NO_SCALING,
+						   true, true);
+}
+
+static void adp_plane_atomic_update(struct drm_plane *plane,
+				    struct drm_atomic_state *state)
+{
+	struct adp_drv_private *adp;
+	struct drm_rect src_rect;
+	struct drm_gem_dma_object *obj;
+	struct drm_framebuffer *fb;
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+	u32 src_pos, src_size, dst_pos, dst_size;
+
+	if (!plane || !new_state)
+		return;
+
+	fb = new_state->fb;
+	if (!fb)
+		return;
+	adp = to_adp(plane->dev);
+
+	drm_rect_fp_to_int(&src_rect, &new_state->src);
+	src_pos = src_rect.x1 << 16 | src_rect.y1;
+	dst_pos = new_state->dst.x1 << 16 | new_state->dst.y1;
+	src_size = drm_rect_width(&src_rect) << 16 | drm_rect_height(&src_rect);
+	dst_size = drm_rect_width(&new_state->dst) << 16 |
+		drm_rect_height(&new_state->dst);
+	writel(src_pos, adp->be + ADBE_SRC_START);
+	writel(src_size, adp->be + ADBE_SRC_SIZE);
+	writel(dst_pos, adp->be + ADBE_DST_START);
+	writel(dst_size, adp->be + ADBE_DST_SIZE);
+	writel(fb->pitches[0], adp->be + ADBE_STRIDE);
+	obj = drm_fb_dma_get_gem_obj(fb, 0);
+	if (obj)
+		writel(obj->dma_addr + fb->offsets[0], adp->be + ADBE_FB_BASE);
+
+	writel(BIT(0), adp->be + ADBE_LAYER_EN1);
+	writel(BIT(0), adp->be + ADBE_LAYER_EN2);
+	writel(BIT(0), adp->be + ADBE_LAYER_EN3);
+	writel(BIT(0), adp->be + ADBE_LAYER_EN4);
+	writel(ADBE_SCALE_CTL_BYPASS, adp->be + ADBE_SCALE_CTL);
+	writel(ADBE_LAYER_CTL_ENABLE | BIT(0), adp->be + ADBE_LAYER_CTL);
+	writel(ADBE_PIX_FMT_XRGB32, adp->be + ADBE_PIX_FMT);
+}
+
+static void adp_plane_atomic_disable(struct drm_plane *plane,
+				     struct drm_atomic_state *state)
+{
+	struct adp_drv_private *adp = to_adp(plane->dev);
+
+	writel(0x0, adp->be + ADBE_LAYER_EN1);
+	writel(0x0, adp->be + ADBE_LAYER_EN2);
+	writel(0x0, adp->be + ADBE_LAYER_EN3);
+	writel(0x0, adp->be + ADBE_LAYER_EN4);
+	writel(ADBE_LAYER_CTL_ENABLE, adp->be + ADBE_LAYER_CTL);
+}
+
+static const struct drm_plane_helper_funcs adp_plane_helper_funcs = {
+	.atomic_check = adp_plane_atomic_check,
+	.atomic_update = adp_plane_atomic_update,
+	.atomic_disable = adp_plane_atomic_disable,
+	DRM_GEM_SHADOW_PLANE_HELPER_FUNCS
+};
+
+static const struct drm_plane_funcs adp_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	DRM_GEM_SHADOW_PLANE_FUNCS
+};
+
+static const u32 plane_formats[] = {
+	DRM_FORMAT_XRGB8888,
+};
+
+#define ALL_CRTCS 1
+
+static struct drm_plane *adp_plane_new(struct adp_drv_private *adp)
+{
+	struct drm_device *drm = &adp->drm;
+	struct drm_plane *plane;
+
+	plane = __drmm_universal_plane_alloc(drm, sizeof(struct drm_plane), 0,
+					     ALL_CRTCS, &adp_plane_funcs,
+					     plane_formats, ARRAY_SIZE(plane_formats),
+					     NULL, DRM_PLANE_TYPE_PRIMARY, "plane");
+	if (IS_ERR(plane)) {
+		drm_err(drm, "failed to allocate plane");
+		return plane;
+	}
+
+	drm_plane_helper_add(plane, &adp_plane_helper_funcs);
+	return plane;
+}
+
+static void adp_enable_vblank(struct adp_drv_private *adp)
+{
+	u32 cur_ctrl;
+
+	writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+
+	cur_ctrl = readl(adp->fe + ADP_CTRL);
+	writel(cur_ctrl | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+}
+
+static int adp_crtc_enable_vblank(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct adp_drv_private *adp = to_adp(dev);
+
+	adp_enable_vblank(adp);
+
+	return 0;
+}
+
+static void adp_disable_vblank(struct adp_drv_private *adp)
+{
+	u32 cur_ctrl;
+
+	cur_ctrl = readl(adp->fe + ADP_CTRL);
+	writel(cur_ctrl & ~ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL);
+	writel(ADP_INT_STATUS_INT_MASK, adp->fe + ADP_INT_STATUS);
+}
+
+static void adp_crtc_disable_vblank(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct adp_drv_private *adp = to_adp(dev);
+
+	adp_disable_vblank(adp);
+}
+
+static void adp_crtc_atomic_enable(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state)
+{
+	struct adp_drv_private *adp = crtc_to_adp(crtc);
+
+	writel(BIT(0), adp->be + ADBE_BLEND_EN2);
+	writel(BIT(4), adp->be + ADBE_BLEND_EN1);
+	writel(BIT(0), adp->be + ADBE_BLEND_EN3);
+	writel(BIT(0), adp->be + ADBE_BLEND_BYPASS);
+	writel(BIT(0), adp->be + ADBE_BLEND_EN4);
+	drm_crtc_vblank_on(crtc);
+}
+
+static void adp_crtc_atomic_disable(struct drm_crtc *crtc,
+				    struct drm_atomic_state *state)
+{
+	struct adp_drv_private *adp = crtc_to_adp(crtc);
+	struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+	drm_atomic_helper_disable_planes_on_crtc(old_state, false);
+
+	writel(0x0, adp->be + ADBE_BLEND_EN2);
+	writel(0x0, adp->be + ADBE_BLEND_EN1);
+	writel(0x0, adp->be + ADBE_BLEND_EN3);
+	writel(0x0, adp->be + ADBE_BLEND_BYPASS);
+	writel(0x0, adp->be + ADBE_BLEND_EN4);
+	drm_crtc_vblank_off(crtc);
+}
+
+static void adp_crtc_atomic_flush(struct drm_crtc *crtc,
+				  struct drm_atomic_state *state)
+{
+	u32 frame_num = 1;
+	unsigned long flags;
+	struct adp_drv_private *adp = crtc_to_adp(crtc);
+	struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc);
+	u64 new_size = ALIGN(new_state->mode.hdisplay *
+			     new_state->mode.vdisplay * 4, PAGE_SIZE);
+
+	if (new_size != adp->mask_buf_size) {
+		if (adp->mask_buf)
+			dma_free_coherent(crtc->dev->dev, adp->mask_buf_size,
+					  adp->mask_buf, adp->mask_iova);
+		adp->mask_buf = NULL;
+		if (new_size != 0) {
+			adp->mask_buf = dma_alloc_coherent(crtc->dev->dev, new_size,
+							   &adp->mask_iova, GFP_KERNEL);
+			memset(adp->mask_buf, 0xFF, new_size);
+			writel(adp->mask_iova, adp->be + ADBE_MASK_BUF);
+		}
+		adp->mask_buf_size = new_size;
+	}
+	writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO);
+	//FIXME: use adbe flush interrupt
+	if (crtc->state->event) {
+		struct drm_pending_vblank_event *event = crtc->state->event;
+
+		crtc->state->event = NULL;
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+		if (drm_crtc_vblank_get(crtc) != 0)
+			drm_crtc_send_vblank_event(crtc, event);
+		else
+			adp->event = event;
+
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	}
+}
+
+static const struct drm_crtc_funcs adp_crtc_funcs = {
+	.destroy = drm_crtc_cleanup,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.enable_vblank = adp_crtc_enable_vblank,
+	.disable_vblank = adp_crtc_disable_vblank,
+};
+
+
+static const struct drm_crtc_helper_funcs adp_crtc_helper_funcs = {
+	.atomic_enable = adp_crtc_atomic_enable,
+	.atomic_disable = adp_crtc_atomic_disable,
+	.atomic_flush = adp_crtc_atomic_flush,
+};
+
+static int adp_setup_crtc(struct adp_drv_private *adp)
+{
+	struct drm_device *drm = &adp->drm;
+	struct drm_plane *primary;
+	int ret;
+
+	primary = adp_plane_new(adp);
+	if (IS_ERR(primary))
+		return PTR_ERR(primary);
+
+	ret = drm_crtc_init_with_planes(drm, &adp->crtc, primary,
+					NULL, &adp_crtc_funcs, NULL);
+	if (ret)
+		return ret;
+
+	drm_crtc_helper_add(&adp->crtc, &adp_crtc_helper_funcs);
+	return 0;
+}
+
+static const struct drm_mode_config_funcs adp_mode_config_funcs = {
+	.fb_create = drm_gem_fb_create_with_dirty,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static int adp_setup_mode_config(struct adp_drv_private *adp)
+{
+	struct drm_device *drm = &adp->drm;
+	int ret;
+	u32 size;
+
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ret;
+
+	/*
+	 * Query screen size restrict the frame buffer size to the screen size
+	 * aligned to the next multiple of 64. This is not necessary but can be
+	 * used as simple check for non-desktop devices.
+	 * Xorg's modesetting driver does not care about the connector
+	 * "non-desktop" property. The max frame buffer width or height can be
+	 * easily checked and a device can be reject if the max width/height is
+	 * smaller than 120 for example.
+	 * Any touchbar daemon is not limited by this small framebuffer size.
+	 */
+	size = readl(adp->fe + ADP_SCREEN_SIZE);
+
+	drm->mode_config.min_width = 32;
+	drm->mode_config.min_height = 32;
+	drm->mode_config.max_width = ALIGN(FIELD_GET(ADP_SCREEN_HSIZE, size), 64);
+	drm->mode_config.max_height = ALIGN(FIELD_GET(ADP_SCREEN_VSIZE, size), 64);
+	drm->mode_config.preferred_depth = 24;
+	drm->mode_config.prefer_shadow = 0;
+	drm->mode_config.funcs = &adp_mode_config_funcs;
+
+	ret = adp_setup_crtc(adp);
+	if (ret) {
+		drm_err(drm, "failed to create crtc");
+		return ret;
+	}
+
+	adp->encoder = drmm_plain_encoder_alloc(drm, NULL, DRM_MODE_ENCODER_DSI, NULL);
+	if (IS_ERR(adp->encoder)) {
+		drm_err(drm, "failed to init encoder");
+		return PTR_ERR(adp->encoder);
+	}
+	adp->encoder->possible_crtcs = ALL_CRTCS;
+
+	ret = drm_bridge_attach(adp->encoder, adp->next_bridge, NULL,
+				DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+	if (ret) {
+		drm_err(drm, "failed to init bridge chain");
+		return ret;
+	}
+
+	adp->connector = drm_bridge_connector_init(drm, adp->encoder);
+	if (IS_ERR(adp->connector))
+		return PTR_ERR(adp->connector);
+
+	drm_connector_attach_encoder(adp->connector, adp->encoder);
+
+	ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	if (ret < 0) {
+		drm_err(drm, "failed to initialize vblank");
+		return ret;
+	}
+
+	drm_mode_config_reset(drm);
+
+	return 0;
+}
+
+static int adp_parse_of(struct platform_device *pdev, struct adp_drv_private *adp)
+{
+	struct device *dev = &pdev->dev;
+
+	adp->be = devm_platform_ioremap_resource_byname(pdev, "be");
+	if (IS_ERR(adp->be)) {
+		dev_err(dev, "failed to map display backend mmio");
+		return PTR_ERR(adp->be);
+	}
+
+	adp->fe = devm_platform_ioremap_resource_byname(pdev, "fe");
+	if (IS_ERR(adp->fe)) {
+		dev_err(dev, "failed to map display pipe mmio");
+		return PTR_ERR(adp->fe);
+	}
+
+	adp->be_irq = platform_get_irq_byname(pdev, "be");
+	if (adp->be_irq < 0)
+		return adp->be_irq;
+
+	adp->fe_irq = platform_get_irq_byname(pdev, "fe");
+	if (adp->fe_irq < 0)
+		return adp->fe_irq;
+
+	return 0;
+}
+
+static irqreturn_t adp_fe_irq(int irq, void *arg)
+{
+	struct adp_drv_private *adp = (struct adp_drv_private *)arg;
+	u32 int_status;
+	u32 int_ctl;
+
+	int_status = readl(adp->fe + ADP_INT_STATUS);
+	if (int_status & ADP_INT_STATUS_VBLANK) {
+		drm_crtc_handle_vblank(&adp->crtc);
+		spin_lock(&adp->crtc.dev->event_lock);
+		if (adp->event) {
+			int_ctl = readl(adp->fe + ADP_CTRL);
+			if ((int_ctl & 0xF00) == 0x600) {
+				drm_crtc_send_vblank_event(&adp->crtc, adp->event);
+				adp->event = NULL;
+				drm_crtc_vblank_put(&adp->crtc);
+			}
+		}
+		spin_unlock(&adp->crtc.dev->event_lock);
+	}
+
+	writel(int_status, adp->fe + ADP_INT_STATUS);
+
+
+	return IRQ_HANDLED;
+}
+
+static int adp_drm_bind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct adp_drv_private *adp = to_adp(drm);
+	int err;
+
+	writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL);
+
+	adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0);
+	if (IS_ERR(adp->next_bridge)) {
+		dev_err(dev, "failed to find next bridge");
+		return PTR_ERR(adp->next_bridge);
+	}
+
+	err = adp_setup_mode_config(adp);
+	if (err < 0)
+		return err;
+
+	err = request_irq(adp->fe_irq, adp_fe_irq, 0, "adp-fe", adp);
+	if (err)
+		return err;
+
+	err = drm_dev_register(&adp->drm, 0);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void adp_drm_unbind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct adp_drv_private *adp = to_adp(drm);
+
+	drm_dev_unregister(drm);
+	drm_atomic_helper_shutdown(drm);
+	free_irq(adp->fe_irq, adp);
+}
+
+static const struct component_master_ops adp_master_ops = {
+	.bind	= adp_drm_bind,
+	.unbind = adp_drm_unbind,
+};
+
+static int compare_dev(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int adp_probe(struct platform_device *pdev)
+{
+	struct device_node *port;
+	struct component_match *match = NULL;
+	struct adp_drv_private *adp;
+	int err;
+
+	adp = devm_drm_dev_alloc(&pdev->dev, &adp_driver, struct adp_drv_private, drm);
+	if (IS_ERR(adp))
+		return PTR_ERR(adp);
+
+	dev_set_drvdata(&pdev->dev, &adp->drm);
+
+	err = adp_parse_of(pdev, adp);
+	if (err < 0)
+		return err;
+
+	port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
+	if (!port)
+		return -ENODEV;
+
+	drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
+	of_node_put(port);
+
+	return component_master_add_with_match(&pdev->dev, &adp_master_ops, match);
+}
+
+static void adp_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &adp_master_ops);
+	dev_set_drvdata(&pdev->dev, NULL);
+}
+
+static const struct of_device_id adp_of_match[] = {
+	{ .compatible = "apple,h7-display-pipe", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, adp_of_match);
+
+static struct platform_driver adp_platform_driver = {
+	.driver = {
+		.name = "adp",
+		.of_match_table = adp_of_match,
+	},
+	.probe = adp_probe,
+	.remove = adp_remove,
+};
+
+module_platform_driver(adp_platform_driver);
+
+MODULE_DESCRIPTION("Apple Display Pipe DRM driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
new file mode 100644
index 000000000000..216d932a7831
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: MIT
+menu "ACP (Audio CoProcessor) Configuration"
+	depends on DRM_AMDGPU
+
+config DRM_AMD_ACP
+	bool "Enable AMD Audio CoProcessor IP support"
+	depends on DRM_AMDGPU
+	select MFD_CORE
+	select PM_GENERIC_DOMAINS if PM
+	help
+	Choose this option to enable ACP IP support for AMD SOCs.
+	This adds the ACP (Audio CoProcessor) IP driver and wires
+	it up into the amdgpu driver.  The ACP block provides the DMA
+	engine for the i2s-based ALSA driver. It is required for audio
+	on APUs which utilize an i2s codec.
+
+endmenu
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
new file mode 100644
index 000000000000..d4176a3fb706
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -0,0 +1,27 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the ACP, which is a sub-component
+# of AMDSOC/AMDGPU drm driver.
+# It provides the HW control for ACP related functionalities.
+
+AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/amd/acp/acp_hw.c b/drivers/gpu/drm/amd/acp/acp_hw.c
new file mode 100644
index 000000000000..c7d7205c9b11
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+
+#include "acp_gfx_if.h"
+
+#define ACP_MODE_I2S	0
+#define ACP_MODE_AZ	1
+
+#define mmACP_AZALIA_I2S_SELECT 0x51d4
+
+int amd_acp_hw_init(struct cgs_device *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor)
+{
+	unsigned int acp_mode = ACP_MODE_I2S;
+
+	if ((acp_version_major == 2) && (acp_version_minor == 2))
+		acp_mode = cgs_read_register(cgs_device,
+					mmACP_AZALIA_I2S_SELECT);
+
+	if (acp_mode != ACP_MODE_I2S)
+		return -ENODEV;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
new file mode 100644
index 000000000000..b26710cae801
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _ACP_GFX_IF_H
+#define _ACP_GFX_IF_H
+
+#include <linux/types.h>
+#include "cgs_common.h"
+
+int amd_acp_hw_init(struct cgs_device *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor);
+
+#endif /* _ACP_GFX_IF_H */
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
new file mode 100644
index 000000000000..7f515be5185d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -0,0 +1,108 @@
+# SPDX-License-Identifier: MIT
+
+config DRM_AMDGPU
+	tristate "AMD GPU"
+	depends on DRM && PCI
+	depends on !UML
+	select FW_LOADER
+	select DRM_CLIENT
+	select DRM_CLIENT_SELECTION
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_DSC_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HDCP_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_KMS_HELPER
+	select DRM_SCHED
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select POWER_SUPPLY
+	select HWMON
+	select I2C
+	select I2C_ALGOBIT
+	select CRC16
+	select BACKLIGHT_CLASS_DEVICE
+	select INTERVAL_TREE
+	select DRM_BUDDY
+	select DRM_SUBALLOC_HELPER
+	select DRM_EXEC
+	select DRM_PANEL_BACKLIGHT_QUIRKS
+	# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
+	# ACPI_VIDEO's dependencies must also be selected.
+	select INPUT if ACPI
+	select ACPI_VIDEO if ACPI
+	# On x86 ACPI_VIDEO also needs ACPI_WMI
+	select X86_PLATFORM_DEVICES if ACPI && X86
+	select ACPI_WMI if ACPI && X86
+	help
+	  Choose this option if you have a recent AMD Radeon graphics card.
+
+	  If M is selected, the module will be called amdgpu.
+
+config DRM_AMDGPU_SI
+	bool "Enable amdgpu support for SI parts"
+	depends on DRM_AMDGPU
+	help
+	  Choose this option if you want to enable support
+	  for SI (Southern Islands) asics.
+
+	  SI (Southern Islands) are first generation GCN GPUs,
+	  supported by both drivers: radeon (old) and amdgpu (new).
+	  By default, SI dedicated GPUs are supported by amdgpu.
+
+	  Use module options to override this:
+	  To use radeon for SI,
+	  radeon.si_support=1 amdgpu.si_support=0
+
+config DRM_AMDGPU_CIK
+	bool "Enable amdgpu support for CIK parts"
+	depends on DRM_AMDGPU
+	help
+	  Choose this option if you want to enable support for CIK (Sea
+	  Islands) asics.
+
+	  CIK (Sea Islands) are second generation GCN GPUs,
+	  supported by both drivers: radeon (old) and amdgpu (new).
+	  By default,
+	  CIK dedicated GPUs are supported by amdgpu
+	  CIK APUs are supported by radeon
+
+	  Use module options to override this:
+	  To use amdgpu for CIK,
+	  radeon.cik_support=0 amdgpu.cik_support=1
+	  To use radeon for CIK,
+	  radeon.cik_support=1 amdgpu.cik_support=0
+
+config DRM_AMDGPU_USERPTR
+	bool "Always enable userptr write support"
+	depends on DRM_AMDGPU
+	select HMM_MIRROR
+	select MMU_NOTIFIER
+	help
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
+
+config DRM_AMD_ISP
+	bool "Enable AMD Image Signal Processor IP support"
+	depends on DRM_AMDGPU && ACPI
+	select MFD_CORE
+	select PM_GENERIC_DOMAINS if PM
+	help
+	Choose this option to enable ISP IP support for AMD SOCs.
+	This adds the ISP (Image Signal Processor) IP driver and wires
+	it up into the amdgpu driver.  It is required for cameras
+	on APUs which utilize mipi cameras.
+
+config DRM_AMDGPU_WERROR
+	bool "Force the compiler to throw an error instead of a warning when compiling"
+	depends on DRM_AMDGPU
+	depends on EXPERT
+	depends on !COMPILE_TEST
+	default n
+	help
+	  Add -Werror to the build flags for amdgpu.ko.
+	  Only enable this if you are warning code for amdgpu.ko.
+
+source "drivers/gpu/drm/amd/acp/Kconfig"
+source "drivers/gpu/drm/amd/display/Kconfig"
+source "drivers/gpu/drm/amd/amdkfd/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
new file mode 100644
index 000000000000..c88760fb52ea
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -0,0 +1,333 @@
+#
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+FULL_AMD_PATH=$(src)/..
+DISPLAY_FOLDER_NAME=display
+FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
+
+ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
+	-I$(FULL_AMD_PATH)/include \
+	-I$(FULL_AMD_PATH)/amdgpu \
+	-I$(FULL_AMD_PATH)/pm/inc \
+	-I$(FULL_AMD_PATH)/acp/include \
+	-I$(FULL_AMD_DISPLAY_PATH) \
+	-I$(FULL_AMD_DISPLAY_PATH)/include \
+	-I$(FULL_AMD_DISPLAY_PATH)/modules/inc \
+	-I$(FULL_AMD_DISPLAY_PATH)/dc \
+	-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
+	-I$(FULL_AMD_PATH)/amdkfd \
+	-I$(FULL_AMD_PATH)/ras/ras_mgr
+
+# Locally disable W=1 warnings enabled in drm subsystem Makefile
+subdir-ccflags-y += -Wno-override-init
+subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
+
+amdgpu-y := amdgpu_drv.o
+
+# add KMS driver
+amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
+	amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
+	atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
+	amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
+	amdgpu_gem.o amdgpu_ring.o \
+	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
+	atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
+	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
+	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+	amdgpu_ib.o amdgpu_pll.o \
+	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
+	amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
+	amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
+	amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o amdgpu_hdp.o \
+	amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
+	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
+	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
+	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
+	amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
+
+amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
+
+amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
+
+# add asic specific block
+amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o \
+	dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
+
+amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o \
+	uvd_v3_1.o vce_v1_0.o
+
+amdgpu-y += \
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
+	nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
+	sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
+	nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
+	cyan_skillfish_reg_init.o
+
+# add DF block
+amdgpu-y += \
+	df_v1_7.o \
+	df_v3_6.o \
+	df_v4_3.o \
+	df_v4_6_2.o \
+	df_v4_15.o
+
+# add GMC block
+amdgpu-y += \
+	gmc_v7_0.o \
+	gmc_v8_0.o \
+	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
+	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
+	mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
+	mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
+	gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
+
+# add UMC block
+amdgpu-y += \
+	umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
+
+# add IH block
+amdgpu-y += \
+	amdgpu_irq.o \
+	amdgpu_ih.o \
+	iceland_ih.o \
+	tonga_ih.o \
+	cz_ih.o \
+	vega10_ih.o \
+	vega20_ih.o \
+	navi10_ih.o \
+	ih_v6_0.o \
+	ih_v6_1.o \
+	ih_v7_0.o
+
+# add PSP block
+amdgpu-y += \
+	amdgpu_psp.o \
+	psp_v3_1.o \
+	psp_v10_0.o \
+	psp_v11_0.o \
+	psp_v11_0_8.o \
+	psp_v12_0.o \
+	psp_v13_0.o \
+	psp_v13_0_4.o \
+	psp_v14_0.o
+
+# add DCE block
+amdgpu-y += \
+	dce_v10_0.o \
+	amdgpu_vkms.o
+
+# add GFX block
+amdgpu-y += \
+	amdgpu_gfx.o \
+	amdgpu_rlc.o \
+	gfx_v8_0.o \
+	gfx_v9_0.o \
+	gfx_v9_4.o \
+	gfx_v9_4_2.o \
+	gfx_v9_4_3.o \
+	gfx_v10_0.o \
+	imu_v11_0.o \
+	gfx_v11_0.o \
+	gfx_v11_0_3.o \
+	imu_v11_0_3.o \
+	gfx_v12_0.o \
+	imu_v12_0.o
+
+# add async DMA block
+amdgpu-y += \
+	amdgpu_sdma.o \
+	sdma_v2_4.o \
+	sdma_v3_0.o \
+	sdma_v4_0.o \
+	sdma_v4_4.o \
+	sdma_v4_4_2.o \
+	sdma_v5_0.o \
+	sdma_v5_2.o \
+	sdma_v6_0.o \
+	sdma_v7_0.o
+
+# add MES block
+amdgpu-y += \
+	amdgpu_mes.o \
+	mes_v11_0.o \
+	mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
+
+# add UVD block
+amdgpu-y += \
+	amdgpu_uvd.o \
+	uvd_v5_0.o \
+	uvd_v6_0.o \
+	uvd_v7_0.o
+
+# add VCE block
+amdgpu-y += \
+	amdgpu_vce.o \
+	vce_v3_0.o \
+	vce_v4_0.o
+
+# add VCN and JPEG block
+amdgpu-y += \
+	amdgpu_vcn.o \
+	vcn_sw_ring.o \
+	vcn_v1_0.o \
+	vcn_v2_0.o \
+	vcn_v2_5.o \
+	vcn_v3_0.o \
+	vcn_v4_0.o \
+	vcn_v4_0_3.o \
+	vcn_v4_0_5.o \
+	vcn_v5_0_0.o \
+	vcn_v5_0_1.o \
+	amdgpu_jpeg.o \
+	jpeg_v1_0.o \
+	jpeg_v2_0.o \
+	jpeg_v2_5.o \
+	jpeg_v3_0.o \
+	jpeg_v4_0.o \
+	jpeg_v4_0_3.o \
+	jpeg_v4_0_5.o \
+	jpeg_v5_0_0.o \
+	jpeg_v5_0_1.o
+
+# add VPE block
+amdgpu-y += \
+	amdgpu_vpe.o \
+	vpe_v6_1.o
+
+# add UMSCH block
+amdgpu-y += \
+	amdgpu_umsch_mm.o \
+	umsch_mm_v4_0.o
+
+#
+# add ATHUB block
+amdgpu-y += \
+	athub_v1_0.o \
+	athub_v2_0.o \
+	athub_v2_1.o \
+	athub_v3_0.o \
+	athub_v4_1_0.o
+
+# add SMUIO block
+amdgpu-y += \
+	smuio_v9_0.o \
+	smuio_v11_0.o \
+	smuio_v11_0_6.o \
+	smuio_v13_0.o \
+	smuio_v13_0_3.o \
+	smuio_v13_0_6.o \
+	smuio_v14_0_2.o
+
+# add reset block
+amdgpu-y += \
+	amdgpu_reset.o
+
+# add MCA block
+amdgpu-y += \
+	mca_v3_0.o
+
+# add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
+
+ifneq ($(CONFIG_HSA_AMD),)
+AMDKFD_PATH := ../amdkfd
+include $(FULL_AMD_PATH)/amdkfd/Makefile
+amdgpu-y += $(AMDKFD_FILES)
+amdgpu-y += \
+	amdgpu_amdkfd_fence.o \
+	amdgpu_amdkfd_gpuvm.o \
+	amdgpu_amdkfd_gfx_v8.o \
+	amdgpu_amdkfd_gfx_v9.o \
+	amdgpu_amdkfd_arcturus.o \
+	amdgpu_amdkfd_aldebaran.o \
+	amdgpu_amdkfd_gc_9_4_3.o \
+	amdgpu_amdkfd_gfx_v10.o \
+	amdgpu_amdkfd_gfx_v10_3.o \
+	amdgpu_amdkfd_gfx_v11.o \
+	amdgpu_amdkfd_gfx_v12.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
+
+# add cgs
+amdgpu-y += amdgpu_cgs.o
+
+# GPU scheduler
+amdgpu-y += amdgpu_job.o
+
+# ACP componet
+ifneq ($(CONFIG_DRM_AMD_ACP),)
+amdgpu-y += amdgpu_acp.o
+
+AMDACPPATH := ../acp
+include $(FULL_AMD_PATH)/acp/Makefile
+
+amdgpu-y += $(AMD_ACP_FILES)
+endif
+
+amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
+amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
+amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
+
+include $(FULL_AMD_PATH)/pm/Makefile
+
+amdgpu-y += $(AMD_POWERPLAY_FILES)
+
+ifneq ($(CONFIG_DRM_AMD_DC),)
+
+RELATIVE_AMD_DISPLAY_PATH = ../$(DISPLAY_FOLDER_NAME)
+include $(FULL_AMD_DISPLAY_PATH)/Makefile
+
+amdgpu-y += $(AMD_DISPLAY_FILES)
+
+endif
+
+# add isp block
+ifneq ($(CONFIG_DRM_AMD_ISP),)
+amdgpu-y += \
+	amdgpu_isp.o \
+	isp_v4_1_0.o \
+	isp_v4_1_1.o
+endif
+
+AMD_GPU_RAS_PATH := ../ras
+AMD_GPU_RAS_FULL_PATH := $(FULL_AMD_PATH)/ras
+include $(AMD_GPU_RAS_FULL_PATH)/Makefile
+amdgpu-y += $(AMD_GPU_RAS_FILES)
+
+obj-$(CONFIG_DRM_AMDGPU)+= amdgpu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
new file mode 100644
index 000000000000..a0f0a17e224f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -0,0 +1,745 @@
+/*
+* Copyright 2006-2007 Advanced Micro Devices, Inc.  
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+/* based on stg/asic_reg/drivers/inc/asic_reg/ObjectID.h ver 23 */
+
+#ifndef _OBJECTID_H
+#define _OBJECTID_H
+
+#if defined(_X86_)
+#pragma pack(1)
+#endif
+
+/****************************************************/
+/* Graphics Object Type Definition                  */
+/****************************************************/
+#define GRAPH_OBJECT_TYPE_NONE                    0x0
+#define GRAPH_OBJECT_TYPE_GPU                     0x1
+#define GRAPH_OBJECT_TYPE_ENCODER                 0x2
+#define GRAPH_OBJECT_TYPE_CONNECTOR               0x3
+#define GRAPH_OBJECT_TYPE_ROUTER                  0x4
+/* deleted */
+#define GRAPH_OBJECT_TYPE_DISPLAY_PATH            0x6  
+#define GRAPH_OBJECT_TYPE_GENERIC                 0x7
+
+/****************************************************/
+/* Encoder Object ID Definition                     */
+/****************************************************/
+#define ENCODER_OBJECT_ID_NONE                    0x00 
+
+/* Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_INTERNAL_LVDS           0x01
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS1          0x02
+#define ENCODER_OBJECT_ID_INTERNAL_TMDS2          0x03
+#define ENCODER_OBJECT_ID_INTERNAL_DAC1           0x04
+#define ENCODER_OBJECT_ID_INTERNAL_DAC2           0x05     /* TV/CV DAC */
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOA          0x06
+#define ENCODER_OBJECT_ID_INTERNAL_SDVOB          0x07
+
+/* External Third Party Encoders */
+#define ENCODER_OBJECT_ID_SI170B                  0x08
+#define ENCODER_OBJECT_ID_CH7303                  0x09
+#define ENCODER_OBJECT_ID_CH7301                  0x0A
+#define ENCODER_OBJECT_ID_INTERNAL_DVO1           0x0B    /* This belongs to Radeon Class Display Hardware */
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOA          0x0C
+#define ENCODER_OBJECT_ID_EXTERNAL_SDVOB          0x0D
+#define ENCODER_OBJECT_ID_TITFP513                0x0E
+#define ENCODER_OBJECT_ID_INTERNAL_LVTM1          0x0F    /* not used for Radeon */
+#define ENCODER_OBJECT_ID_VT1623                  0x10
+#define ENCODER_OBJECT_ID_HDMI_SI1930             0x11
+#define ENCODER_OBJECT_ID_HDMI_INTERNAL           0x12
+#define ENCODER_OBJECT_ID_ALMOND                  0x22
+#define ENCODER_OBJECT_ID_TRAVIS                  0x23
+#define ENCODER_OBJECT_ID_NUTMEG                  0x22
+#define ENCODER_OBJECT_ID_HDMI_ANX9805            0x26
+
+/* Kaleidoscope (KLDSCP) Class Display Hardware (internal) */
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1   0x13
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1    0x14
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1    0x15
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2    0x16  /* Shared with CV/TV and CRT */
+#define ENCODER_OBJECT_ID_SI178                   0X17  /* External TMDS (dual link, no HDCP.) */
+#define ENCODER_OBJECT_ID_MVPU_FPGA               0x18  /* MVPU FPGA chip */
+#define ENCODER_OBJECT_ID_INTERNAL_DDI            0x19
+#define ENCODER_OBJECT_ID_VT1625                  0x1A
+#define ENCODER_OBJECT_ID_HDMI_SI1932             0x1B
+#define ENCODER_OBJECT_ID_DP_AN9801               0x1C
+#define ENCODER_OBJECT_ID_DP_DP501                0x1D
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY         0x1E
+#define ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA   0x1F
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY1        0x20
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY2        0x21
+#define ENCODER_OBJECT_ID_INTERNAL_VCE            0x24
+#define ENCODER_OBJECT_ID_INTERNAL_UNIPHY3        0x25
+#define ENCODER_OBJECT_ID_INTERNAL_AMCLK          0x27
+
+#define ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO    0xFF
+
+/****************************************************/
+/* Connector Object ID Definition                   */
+/****************************************************/
+#define CONNECTOR_OBJECT_ID_NONE                  0x00 
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I     0x01
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I       0x02
+#define CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D     0x03
+#define CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D       0x04
+#define CONNECTOR_OBJECT_ID_VGA                   0x05
+#define CONNECTOR_OBJECT_ID_COMPOSITE             0x06
+#define CONNECTOR_OBJECT_ID_SVIDEO                0x07
+#define CONNECTOR_OBJECT_ID_YPbPr                 0x08
+#define CONNECTOR_OBJECT_ID_D_CONNECTOR           0x09
+#define CONNECTOR_OBJECT_ID_9PIN_DIN              0x0A  /* Supports both CV & TV */
+#define CONNECTOR_OBJECT_ID_SCART                 0x0B
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_A           0x0C
+#define CONNECTOR_OBJECT_ID_HDMI_TYPE_B           0x0D
+#define CONNECTOR_OBJECT_ID_LVDS                  0x0E
+#define CONNECTOR_OBJECT_ID_7PIN_DIN              0x0F
+#define CONNECTOR_OBJECT_ID_PCIE_CONNECTOR        0x10
+#define CONNECTOR_OBJECT_ID_CROSSFIRE             0x11
+#define CONNECTOR_OBJECT_ID_HARDCODE_DVI          0x12
+#define CONNECTOR_OBJECT_ID_DISPLAYPORT           0x13
+#define CONNECTOR_OBJECT_ID_eDP                   0x14
+#define CONNECTOR_OBJECT_ID_MXM                   0x15
+#define CONNECTOR_OBJECT_ID_LVDS_eDP              0x16
+#define CONNECTOR_OBJECT_ID_USBC                  0x17
+
+/* deleted */
+
+/****************************************************/
+/* Router Object ID Definition                      */
+/****************************************************/
+#define ROUTER_OBJECT_ID_NONE											0x00
+#define ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL				0x01
+
+/****************************************************/
+/* Generic Object ID Definition                     */
+/****************************************************/
+#define GENERIC_OBJECT_ID_NONE                    0x00
+#define GENERIC_OBJECT_ID_GLSYNC                  0x01
+#define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE        0x02
+#define GENERIC_OBJECT_ID_MXM_OPM                 0x03
+#define GENERIC_OBJECT_ID_STEREO_PIN              0x04        //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin
+#define GENERIC_OBJECT_ID_BRACKET_LAYOUT          0x05
+
+/****************************************************/
+/* Graphics Object ENUM ID Definition               */
+/****************************************************/
+#define GRAPH_OBJECT_ENUM_ID1                     0x01
+#define GRAPH_OBJECT_ENUM_ID2                     0x02
+#define GRAPH_OBJECT_ENUM_ID3                     0x03
+#define GRAPH_OBJECT_ENUM_ID4                     0x04
+#define GRAPH_OBJECT_ENUM_ID5                     0x05
+#define GRAPH_OBJECT_ENUM_ID6                     0x06
+#define GRAPH_OBJECT_ENUM_ID7                     0x07
+
+/****************************************************/
+/* Graphics Object ID Bit definition                */
+/****************************************************/
+#define OBJECT_ID_MASK                            0x00FF
+#define ENUM_ID_MASK                              0x0700
+#define RESERVED1_ID_MASK                         0x0800
+#define OBJECT_TYPE_MASK                          0x7000
+#define RESERVED2_ID_MASK                         0x8000
+                                                  
+#define OBJECT_ID_SHIFT                           0x00
+#define ENUM_ID_SHIFT                             0x08
+#define OBJECT_TYPE_SHIFT                         0x0C
+
+
+/****************************************************/
+/* Graphics Object family definition                */
+/****************************************************/
+#define CONSTRUCTOBJECTFAMILYID(GRAPHICS_OBJECT_TYPE, GRAPHICS_OBJECT_ID) (GRAPHICS_OBJECT_TYPE << OBJECT_TYPE_SHIFT | \
+                                                                           GRAPHICS_OBJECT_ID   << OBJECT_ID_SHIFT)
+/****************************************************/
+/* GPU Object ID definition - Shared with BIOS      */
+/****************************************************/
+#define GPU_ENUM_ID1                            ( GRAPH_OBJECT_TYPE_GPU << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT)
+
+/****************************************************/
+/* Encoder Object ID definition - Shared with BIOS  */
+/****************************************************/
+/*
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1        0x2101      
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1       0x2102
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1       0x2103
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1        0x2104
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1        0x2105
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1       0x2106
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1       0x2107
+#define ENCODER_SIL170B_ENUM_ID1              0x2108  
+#define ENCODER_CH7303_ENUM_ID1               0x2109
+#define ENCODER_CH7301_ENUM_ID1               0x210A
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1        0x210B
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1       0x210C
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1       0x210D
+#define ENCODER_TITFP513_ENUM_ID1             0x210E
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1       0x210F
+#define ENCODER_VT1623_ENUM_ID1               0x2110
+#define ENCODER_HDMI_SI1930_ENUM_ID1          0x2111
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1        0x2112
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1   0x2113
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1    0x2114
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1    0x2115
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1    0x2116  
+#define ENCODER_SI178_ENUM_ID1                   0x2117 
+#define ENCODER_MVPU_FPGA_ENUM_ID1               0x2118
+#define ENCODER_INTERNAL_DDI_ENUM_ID1            0x2119
+#define ENCODER_VT1625_ENUM_ID1                  0x211A
+#define ENCODER_HDMI_SI1932_ENUM_ID1             0x211B
+#define ENCODER_ENCODER_DP_AN9801_ENUM_ID1       0x211C
+#define ENCODER_DP_DP501_ENUM_ID1                0x211D
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1         0x211E
+*/
+#define ENCODER_INTERNAL_LVDS_ENUM_ID1     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_LVDS << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS1_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_TMDS1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_TMDS2_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_TMDS2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC1_ENUM_ID1     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DAC2_ENUM_ID1     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_DAC2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOA_ENUM_ID2    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_SDVOB_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+#define ENCODER_SIL170B_ENUM_ID1           ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_SI170B << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7303_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_CH7303 << OBJECT_ID_SHIFT)
+
+#define ENCODER_CH7301_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_CH7301 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DVO1_ENUM_ID1     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+#define ENCODER_EXTERNAL_SDVOA_ENUM_ID2    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_EXTERNAL_SDVOA << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_EXTERNAL_SDVOB_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_EXTERNAL_SDVOB << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_TITFP513_ENUM_ID1          ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_TITFP513 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_LVTM1_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_LVTM1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_VT1623_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_VT1623 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1930_ENUM_ID1       ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_HDMI_SI1930 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_INTERNAL_ENUM_ID1     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_HDMI_INTERNAL << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID1   ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_INTERNAL_KLDSCP_TMDS1_ENUM_ID2   ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1 << OBJECT_ID_SHIFT)
+
+
+#define ENCODER_INTERNAL_KLDSCP_DVO1_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC1_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_DAC2_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2 << OBJECT_ID_SHIFT)  // Shared with CV/TV and CRT
+
+#define ENCODER_SI178_ENUM_ID1                    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_SI178 << OBJECT_ID_SHIFT)  
+
+#define ENCODER_MVPU_FPGA_ENUM_ID1                ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                   ENCODER_OBJECT_ID_MVPU_FPGA << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_DDI_ENUM_ID1     (  GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_INTERNAL_DDI << OBJECT_ID_SHIFT) 
+
+#define ENCODER_VT1625_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_VT1625 << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_SI1932_ENUM_ID1       ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_HDMI_SI1932 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_DP501_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_DP_DP501 << OBJECT_ID_SHIFT)
+
+#define ENCODER_DP_AN9801_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                             GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                             ENCODER_OBJECT_ID_DP_AN9801 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID1         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY_ENUM_ID2         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_KLDSCP_LVTMA_ENUM_ID1   ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA << OBJECT_ID_SHIFT)  
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID1         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY1_ENUM_ID2         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY1 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID1         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY2_ENUM_ID2         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY2 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY3_ENUM_ID1         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 << OBJECT_ID_SHIFT)
+
+#define ENCODER_INTERNAL_UNIPHY3_ENUM_ID2         ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 ENCODER_OBJECT_ID_INTERNAL_UNIPHY3 << OBJECT_ID_SHIFT)
+
+#define ENCODER_GENERAL_EXTERNAL_DVO_ENUM_ID1    ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_GENERAL_EXTERNAL_DVO << OBJECT_ID_SHIFT)
+
+#define ENCODER_ALMOND_ENUM_ID1                  ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_ALMOND << OBJECT_ID_SHIFT)
+
+#define ENCODER_ALMOND_ENUM_ID2                  ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_ALMOND << OBJECT_ID_SHIFT)
+
+#define ENCODER_TRAVIS_ENUM_ID1                  ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_TRAVIS << OBJECT_ID_SHIFT)
+
+#define ENCODER_TRAVIS_ENUM_ID2                  ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_TRAVIS << OBJECT_ID_SHIFT)
+
+#define ENCODER_NUTMEG_ENUM_ID1                  ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_NUTMEG << OBJECT_ID_SHIFT)
+
+#define ENCODER_VCE_ENUM_ID1                     ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_INTERNAL_VCE << OBJECT_ID_SHIFT)
+
+#define ENCODER_HDMI_ANX9805_ENUM_ID1            ( GRAPH_OBJECT_TYPE_ENCODER << OBJECT_TYPE_SHIFT |\
+                                                  GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                  ENCODER_OBJECT_ID_HDMI_ANX9805 << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Connector Object ID definition - Shared with BIOS */
+/****************************************************/
+/*
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1        0x3101
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1          0x3102
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1        0x3103
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1          0x3104
+#define CONNECTOR_VGA_ENUM_ID1                      0x3105
+#define CONNECTOR_COMPOSITE_ENUM_ID1                0x3106
+#define CONNECTOR_SVIDEO_ENUM_ID1                   0x3107
+#define CONNECTOR_YPbPr_ENUM_ID1                    0x3108
+#define CONNECTOR_D_CONNECTORE_ENUM_ID1             0x3109
+#define CONNECTOR_9PIN_DIN_ENUM_ID1                 0x310A
+#define CONNECTOR_SCART_ENUM_ID1                    0x310B
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1              0x310C
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1              0x310D
+#define CONNECTOR_LVDS_ENUM_ID1                     0x310E
+#define CONNECTOR_7PIN_DIN_ENUM_ID1                 0x310F
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1           0x3110
+*/
+#define CONNECTOR_LVDS_ENUM_ID1                ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_LVDS << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_LVDS_ENUM_ID2                ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_LVDS << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_eDP_ENUM_ID1                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_eDP_ENUM_ID2                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID1   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_I_ENUM_ID2   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID1     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_I_ENUM_ID2     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID1   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID2   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID3   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID4   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID5   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SINGLE_LINK_DVI_D_ENUM_ID6   ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID1     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID2     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID3     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DUAL_LINK_DVI_D_ENUM_ID4     ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID1                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_VGA_ENUM_ID2                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_VGA << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_COMPOSITE_ENUM_ID1           ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_COMPOSITE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_COMPOSITE_ENUM_ID2           ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_COMPOSITE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SVIDEO_ENUM_ID1              ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SVIDEO << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SVIDEO_ENUM_ID2              ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SVIDEO << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_YPbPr_ENUM_ID1               ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_YPbPr << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_YPbPr_ENUM_ID2               ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_YPbPr << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_D_CONNECTOR_ENUM_ID1         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_D_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_D_CONNECTOR_ENUM_ID2         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_D_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_9PIN_DIN_ENUM_ID1            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_9PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_9PIN_DIN_ENUM_ID2            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_9PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SCART_ENUM_ID1               ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SCART << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_SCART_ENUM_ID2               ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_SCART << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID1         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID2         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID3         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID4         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID5         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_A_ENUM_ID6         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_A << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID1         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_B << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HDMI_TYPE_B_ENUM_ID2         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HDMI_TYPE_B << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_7PIN_DIN_ENUM_ID1            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_7PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_7PIN_DIN_ENUM_ID2            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_7PIN_DIN << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID1      ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_PCIE_CONNECTOR_ENUM_ID2      ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_PCIE_CONNECTOR << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID1           ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_CROSSFIRE_ENUM_ID2           ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_CROSSFIRE << OBJECT_ID_SHIFT)
+
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID1        ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_HARDCODE_DVI_ENUM_ID2        ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_HARDCODE_DVI << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID1         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID2         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID3         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID4         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID5         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_DISPLAYPORT_ENUM_ID6         ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_DISPLAYPORT << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_MXM_ENUM_ID1                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_DP_A
+
+#define CONNECTOR_MXM_ENUM_ID2                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_DP_B
+
+#define CONNECTOR_MXM_ENUM_ID3                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID3 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_DP_C
+
+#define CONNECTOR_MXM_ENUM_ID4                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID4 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_DP_D
+
+#define CONNECTOR_MXM_ENUM_ID5                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID5 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_LVDS_TXxx
+
+#define CONNECTOR_MXM_ENUM_ID6                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID6 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_LVDS_UXxx
+
+#define CONNECTOR_MXM_ENUM_ID7                 ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID7 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_MXM << OBJECT_ID_SHIFT)          //Mapping to MXM_DAC
+
+#define CONNECTOR_LVDS_eDP_ENUM_ID1            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_LVDS_eDP << OBJECT_ID_SHIFT)
+
+#define CONNECTOR_LVDS_eDP_ENUM_ID2            ( GRAPH_OBJECT_TYPE_CONNECTOR << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 CONNECTOR_OBJECT_ID_LVDS_eDP << OBJECT_ID_SHIFT)
+
+/****************************************************/
+/* Router Object ID definition - Shared with BIOS   */
+/****************************************************/
+#define ROUTER_I2C_EXTENDER_CNTL_ENUM_ID1      ( GRAPH_OBJECT_TYPE_ROUTER << OBJECT_TYPE_SHIFT |\
+                                                GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                ROUTER_OBJECT_ID_I2C_EXTENDER_CNTL << OBJECT_ID_SHIFT)
+
+/* deleted */
+
+/****************************************************/
+/* Generic Object ID definition - Shared with BIOS  */
+/****************************************************/
+#define GENERICOBJECT_GLSYNC_ENUM_ID1           (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_GLSYNC << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_PX2_NON_DRIVABLE_ID1       (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_PX2_NON_DRIVABLE<< OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_PX2_NON_DRIVABLE_ID2       (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_PX2_NON_DRIVABLE<< OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_MXM_OPM_ENUM_ID1           (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_MXM_OPM << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_STEREO_PIN_ENUM_ID1        (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+/****************************************************/
+/* Object Cap definition - Shared with BIOS         */
+/****************************************************/
+#define GRAPHICS_OBJECT_CAP_I2C                 0x00000001L
+#define GRAPHICS_OBJECT_CAP_TABLE_ID            0x00000002L
+
+
+#define GRAPHICS_OBJECT_I2CCOMMAND_TABLE_ID                   0x01
+#define GRAPHICS_OBJECT_HOTPLUGDETECTIONINTERUPT_TABLE_ID     0x02
+#define GRAPHICS_OBJECT_ENCODER_OUTPUT_PROTECTION_TABLE_ID    0x03
+
+#if defined(_X86_)
+#pragma pack()
+#endif
+
+#endif  /*GRAPHICTYPE */
+
+
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
new file mode 100644
index 000000000000..daa7b23bc775
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "aldebaran.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
+
+static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+	if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+	     adev->gmc.xgmi.connected_to_cpu))
+		return true;
+
+	return false;
+}
+
+static struct amdgpu_reset_handler *
+aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
+			    struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_reset_handler *handler;
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+	int i;
+
+	if (reset_context->method == AMD_RESET_METHOD_NONE) {
+		if (aldebaran_is_mode2_default(reset_ctl))
+			reset_context->method = AMD_RESET_METHOD_MODE2;
+		else
+			reset_context->method = amdgpu_asic_reset_method(adev);
+	}
+
+	if (reset_context->method != AMD_RESET_METHOD_NONE) {
+		dev_dbg(adev->dev, "Getting reset handler for method %d\n",
+			reset_context->method);
+		for_each_handler(i, handler, reset_ctl) {
+			if (handler->reset_method == reset_context->method)
+				return handler;
+		}
+	}
+
+	dev_dbg(adev->dev, "Reset handler not found!\n");
+
+	return NULL;
+}
+
+static inline uint32_t aldebaran_get_ip_block_mask(struct amdgpu_device *adev)
+{
+	uint32_t ip_block_mask = BIT(AMD_IP_BLOCK_TYPE_GFX) |
+				 BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+	if (adev->aid_mask)
+		ip_block_mask |= BIT(AMD_IP_BLOCK_TYPE_IH);
+
+	return ip_block_mask;
+}
+
+static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
+{
+	uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+	uint32_t ip_block;
+	int r, i;
+
+	/* Skip suspend of SDMA IP versions >= 4.4.2. They are multi-aid */
+	if (adev->aid_mask)
+		ip_block_mask &= ~BIT(AMD_IP_BLOCK_TYPE_SDMA);
+
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		ip_block = BIT(adev->ip_blocks[i].version->type);
+		if (!(ip_block_mask & ip_block))
+			continue;
+
+		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int
+aldebaran_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
+				  struct amdgpu_reset_context *reset_context)
+{
+	int r = 0;
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+
+	dev_dbg(adev->dev, "Aldebaran prepare hw context\n");
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+	if (!amdgpu_sriov_vf(adev))
+		r = aldebaran_mode2_suspend_ip(adev);
+
+	return r;
+}
+
+static void aldebaran_async_reset(struct work_struct *work)
+{
+	struct amdgpu_reset_handler *handler;
+	struct amdgpu_reset_control *reset_ctl =
+		container_of(work, struct amdgpu_reset_control, reset_work);
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+	int i;
+
+	for_each_handler(i, handler, reset_ctl)	{
+		if (handler->reset_method == reset_ctl->active_reset) {
+			dev_dbg(adev->dev, "Resetting device\n");
+			handler->do_reset(adev);
+			break;
+		}
+	}
+}
+
+static int aldebaran_mode2_reset(struct amdgpu_device *adev)
+{
+	/* disable BM */
+	pci_clear_master(adev->pdev);
+	adev->asic_reset_res = amdgpu_dpm_mode2_reset(adev);
+	return adev->asic_reset_res;
+}
+
+static int
+aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
+			      struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *tmp_adev = NULL;
+	int r = 0;
+
+	dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+	if (reset_device_list == NULL)
+		return -EINVAL;
+
+	if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+	    reset_context->hive == NULL) {
+		/* Wrong context, return error */
+		return -EINVAL;
+	}
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+		tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
+	}
+	/*
+	 * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
+	 * them together so that they can be completed asynchronously on multiple nodes
+	 */
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		/* For XGMI run all resets in parallel to speed up the process */
+		if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+			if (!queue_work(system_unbound_wq,
+					&tmp_adev->reset_cntl->reset_work))
+				r = -EALREADY;
+		} else
+			r = aldebaran_mode2_reset(tmp_adev);
+		if (r) {
+			dev_err(tmp_adev->dev,
+				"ASIC reset failed with error, %d for drm dev, %s",
+				r, adev_to_drm(tmp_adev)->unique);
+			break;
+		}
+	}
+
+	/* For XGMI wait for all resets to complete before proceed */
+	if (!r) {
+		list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+				flush_work(&tmp_adev->reset_cntl->reset_work);
+				r = tmp_adev->asic_reset_res;
+				if (r)
+					break;
+			}
+		}
+	}
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+		tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+	}
+
+	return r;
+}
+
+static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
+{
+	struct amdgpu_firmware_info *ucode_list[AMDGPU_UCODE_ID_MAXIMUM];
+	uint32_t ip_block_mask = aldebaran_get_ip_block_mask(adev);
+	struct amdgpu_firmware_info *ucode;
+	struct amdgpu_ip_block *cmn_block;
+	struct amdgpu_ip_block *ih_block;
+	int ucode_count = 0;
+	int i, r;
+
+	dev_dbg(adev->dev, "Reloading ucodes after reset\n");
+	for (i = 0; i < adev->firmware.max_ucodes; i++) {
+		ucode = &adev->firmware.ucode[i];
+		if (!ucode->fw)
+			continue;
+		switch (ucode->ucode_id) {
+		case AMDGPU_UCODE_ID_SDMA0:
+		case AMDGPU_UCODE_ID_SDMA1:
+		case AMDGPU_UCODE_ID_SDMA2:
+		case AMDGPU_UCODE_ID_SDMA3:
+		case AMDGPU_UCODE_ID_SDMA4:
+		case AMDGPU_UCODE_ID_SDMA5:
+		case AMDGPU_UCODE_ID_SDMA6:
+		case AMDGPU_UCODE_ID_SDMA7:
+		case AMDGPU_UCODE_ID_CP_MEC1:
+		case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		case AMDGPU_UCODE_ID_RLC_G:
+			ucode_list[ucode_count++] = ucode;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Reinit NBIF block */
+	cmn_block =
+		amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_COMMON);
+	if (unlikely(!cmn_block)) {
+		dev_err(adev->dev, "Failed to get BIF handle\n");
+		return -EINVAL;
+	}
+	r = amdgpu_ip_block_resume(cmn_block);
+	if (r)
+		return r;
+
+	if (ip_block_mask & BIT(AMD_IP_BLOCK_TYPE_IH)) {
+		ih_block = amdgpu_device_ip_get_ip_block(adev,
+							 AMD_IP_BLOCK_TYPE_IH);
+		if (unlikely(!ih_block)) {
+			dev_err(adev->dev, "Failed to get IH handle\n");
+			return -EINVAL;
+		}
+		r = amdgpu_ip_block_resume(ih_block);
+		if (r)
+			return r;
+	}
+
+	/* Reinit GFXHUB */
+	adev->gfxhub.funcs->init(adev);
+	r = adev->gfxhub.funcs->gart_enable(adev);
+	if (r) {
+		dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
+		return r;
+	}
+
+	/* Reload GFX firmware */
+	r = psp_load_fw_list(&adev->psp, ucode_list, ucode_count);
+	if (r) {
+		dev_err(adev->dev, "GFX ucode load failed after reset\n");
+		return r;
+	}
+
+	/* Resume RLC, FW needs RLC alive to complete reset process */
+	adev->gfx.rlc.funcs->resume(adev);
+
+	/* Wait for FW reset event complete */
+	r = amdgpu_dpm_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0);
+	if (r) {
+		dev_err(adev->dev,
+			"Failed to get response from firmware after reset\n");
+		return r;
+	}
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!(adev->ip_blocks[i].version->type ==
+			      AMD_IP_BLOCK_TYPE_GFX ||
+		      adev->ip_blocks[i].version->type ==
+			      AMD_IP_BLOCK_TYPE_SDMA))
+			continue;
+
+		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!(adev->ip_blocks[i].version->type ==
+			      AMD_IP_BLOCK_TYPE_GFX ||
+		      adev->ip_blocks[i].version->type ==
+			      AMD_IP_BLOCK_TYPE_SDMA ||
+		      adev->ip_blocks[i].version->type ==
+			      AMD_IP_BLOCK_TYPE_COMMON))
+			continue;
+
+		if (adev->ip_blocks[i].version->funcs->late_init) {
+			r = adev->ip_blocks[i].version->funcs->late_init(
+				&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev,
+					"late_init of IP block <%s> failed %d after reset\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				return r;
+			}
+		}
+		adev->ip_blocks[i].status.late_initialized = true;
+	}
+
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+	return r;
+}
+
+static int
+aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
+				  struct amdgpu_reset_context *reset_context)
+{
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *tmp_adev = NULL;
+	struct amdgpu_ras *con;
+	int r;
+
+	if (reset_device_list == NULL)
+		return -EINVAL;
+
+	if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) ==
+		    IP_VERSION(13, 0, 2) &&
+	    reset_context->hive == NULL) {
+		/* Wrong context, return error */
+		return -EINVAL;
+	}
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		amdgpu_set_init_level(tmp_adev,
+				AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+		dev_info(tmp_adev->dev,
+			 "GPU reset succeeded, trying to resume\n");
+		/*TBD: Ideally should clear only GFX, SDMA blocks*/
+		amdgpu_ras_clear_err_state(tmp_adev);
+		r = aldebaran_mode2_restore_ip(tmp_adev);
+		if (r)
+			goto end;
+
+		/*
+		 * Add this ASIC as tracked as reset was already
+		 * complete successfully.
+		 */
+		amdgpu_register_gpu_instance(tmp_adev);
+
+		/* Resume RAS, ecc_irq */
+		con = amdgpu_ras_get_context(tmp_adev);
+		if (!amdgpu_sriov_vf(tmp_adev) && con) {
+			if (tmp_adev->sdma.ras &&
+				tmp_adev->sdma.ras->ras_block.ras_late_init) {
+				r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->sdma.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+
+			if (tmp_adev->gfx.ras &&
+				tmp_adev->gfx.ras->ras_block.ras_late_init) {
+				r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
+						&tmp_adev->gfx.ras->ras_block.ras_comm);
+				if (r) {
+					dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
+					goto end;
+				}
+			}
+		}
+
+		amdgpu_ras_resume(tmp_adev);
+
+		/* Update PSP FW topology after reset */
+		if (reset_context->hive &&
+		    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+			r = amdgpu_xgmi_update_topology(reset_context->hive,
+							tmp_adev);
+
+		if (!r) {
+			amdgpu_set_init_level(tmp_adev,
+					      AMDGPU_INIT_LEVEL_DEFAULT);
+			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+
+			r = amdgpu_ib_ring_tests(tmp_adev);
+			if (r) {
+				dev_err(tmp_adev->dev,
+					"ib ring test failed (%d).\n", r);
+				r = -EAGAIN;
+				tmp_adev->asic_reset_res = r;
+				goto end;
+			}
+		}
+	}
+
+end:
+	return r;
+}
+
+static struct amdgpu_reset_handler aldebaran_mode2_handler = {
+	.reset_method		= AMD_RESET_METHOD_MODE2,
+	.prepare_env		= NULL,
+	.prepare_hwcontext	= aldebaran_mode2_prepare_hwcontext,
+	.perform_reset		= aldebaran_mode2_perform_reset,
+	.restore_hwcontext	= aldebaran_mode2_restore_hwcontext,
+	.restore_env		= NULL,
+	.do_reset		= aldebaran_mode2_reset,
+};
+
+static struct amdgpu_reset_handler
+	*aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
+		&aldebaran_mode2_handler,
+		&xgmi_reset_on_init_handler,
+	};
+
+int aldebaran_reset_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_reset_control *reset_ctl;
+
+	reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
+	if (!reset_ctl)
+		return -ENOMEM;
+
+	reset_ctl->handle = adev;
+	reset_ctl->async_reset = aldebaran_async_reset;
+	reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
+	reset_ctl->get_reset_handler = aldebaran_get_reset_handler;
+
+	INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
+	/* Only mode2 is handled through reset control now */
+	reset_ctl->reset_handlers = &aldebaran_rst_handlers;
+
+	adev->reset_cntl = reset_ctl;
+
+	return 0;
+}
+
+int aldebaran_reset_fini(struct amdgpu_device *adev)
+{
+	kfree(adev->reset_cntl);
+	adev->reset_cntl = NULL;
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.h b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
new file mode 100644
index 000000000000..a07db5454d49
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ALDEBARAN_H__
+#define __ALDEBARAN_H__
+
+#include "amdgpu.h"
+
+int aldebaran_reset_init(struct amdgpu_device *adev);
+int aldebaran_reset_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
new file mode 100644
index 000000000000..28e6c9ab8767
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "aldebaran_ip_offset.h"
+
+int aldebaran_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialized the block needed by our driver  */
+	uint32_t i;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+		adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+		adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+		adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+		adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
new file mode 100644
index 000000000000..9f9774f58ce1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -0,0 +1,1808 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __AMDGPU_H__
+#define __AMDGPU_H__
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) "amdgpu: " fmt
+
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+
+#define dev_fmt(fmt) "amdgpu: " fmt
+
+#include "amdgpu_ctx.h"
+
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/rbtree.h>
+#include <linux/hashtable.h>
+#include <linux/dma-fence.h>
+#include <linux/pci.h>
+
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_placement.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+
+#include <kgd_kfd_interface.h>
+#include "dm_pp_interface.h"
+#include "kgd_pp_interface.h"
+
+#include "amd_shared.h"
+#include "amdgpu_utils.h"
+#include "amdgpu_mode.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_irq.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_ttm.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_gds.h"
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dpm.h"
+#include "amdgpu_acp.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_umsch_mm.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_lsdma.h"
+#include "amdgpu_nbio.h"
+#include "amdgpu_hdp.h"
+#include "amdgpu_dm.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_csa.h"
+#include "amdgpu_mes_ctx.h"
+#include "amdgpu_gart.h"
+#include "amdgpu_debugfs.h"
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_doorbell.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_discovery.h"
+#include "amdgpu_mes.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_mmhub.h"
+#include "amdgpu_gfxhub.h"
+#include "amdgpu_df.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_mca.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_cper.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_seq64.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
+
+#define MAX_GPU_INSTANCE		64
+
+#define GFX_SLICE_PERIOD_MS		250
+
+struct amdgpu_gpu_instance {
+	struct amdgpu_device		*adev;
+	int				mgpu_fan_enabled;
+};
+
+struct amdgpu_mgpu_info {
+	struct amdgpu_gpu_instance	gpu_ins[MAX_GPU_INSTANCE];
+	struct mutex			mutex;
+	uint32_t			num_gpu;
+	uint32_t			num_dgpu;
+	uint32_t			num_apu;
+};
+
+enum amdgpu_ss {
+	AMDGPU_SS_DRV_LOAD,
+	AMDGPU_SS_DEV_D0,
+	AMDGPU_SS_DEV_D3,
+	AMDGPU_SS_DRV_UNLOAD
+};
+
+struct amdgpu_hwip_reg_entry {
+	u32		hwip;
+	u32		inst;
+	u32		seg;
+	u32		reg_offset;
+	const char	*reg_name;
+};
+
+struct amdgpu_watchdog_timer {
+	bool timeout_fatal_disable;
+	uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
+};
+
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENGTH	256
+
+/*
+ * Modules parameters.
+ */
+extern int amdgpu_modeset;
+extern unsigned int amdgpu_vram_limit;
+extern int amdgpu_vis_vram_limit;
+extern int amdgpu_gart_size;
+extern int amdgpu_gtt_size;
+extern int amdgpu_moverate;
+extern int amdgpu_audio;
+extern int amdgpu_disp_priority;
+extern int amdgpu_hw_i2c;
+extern int amdgpu_pcie_gen2;
+extern int amdgpu_msi;
+extern char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
+extern int amdgpu_dpm;
+extern int amdgpu_fw_load_type;
+extern int amdgpu_aspm;
+extern int amdgpu_runtime_pm;
+extern uint amdgpu_ip_block_mask;
+extern int amdgpu_bapm;
+extern int amdgpu_deep_color;
+extern int amdgpu_vm_size;
+extern int amdgpu_vm_block_size;
+extern int amdgpu_vm_fragment_size;
+extern int amdgpu_vm_fault_stop;
+extern int amdgpu_vm_debug;
+extern int amdgpu_vm_update_mode;
+extern int amdgpu_exp_hw_support;
+extern int amdgpu_dc;
+extern int amdgpu_sched_jobs;
+extern int amdgpu_sched_hw_submission;
+extern uint amdgpu_pcie_gen_cap;
+extern uint amdgpu_pcie_lane_cap;
+extern u64 amdgpu_cg_mask;
+extern uint amdgpu_pg_mask;
+extern uint amdgpu_sdma_phase_quantum;
+extern char *amdgpu_disable_cu;
+extern char *amdgpu_virtual_display;
+extern uint amdgpu_pp_feature_mask;
+extern uint amdgpu_force_long_training;
+extern int amdgpu_lbpw;
+extern int amdgpu_compute_multipipe;
+extern int amdgpu_gpu_recovery;
+extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
+extern int amdgpu_smu_pptable_id;
+extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_freesync_vid_mode;
+extern uint amdgpu_dc_debug_mask;
+extern uint amdgpu_dc_visual_confirm;
+extern int amdgpu_dm_abm_level;
+extern int amdgpu_backlight;
+extern int amdgpu_damage_clips;
+extern struct amdgpu_mgpu_info mgpu_info;
+extern int amdgpu_ras_enable;
+extern uint amdgpu_ras_mask;
+extern int amdgpu_bad_page_threshold;
+extern bool amdgpu_ignore_bad_page_threshold;
+extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
+extern int amdgpu_async_gfx_ring;
+extern int amdgpu_mcbp;
+extern int amdgpu_discovery;
+extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
+extern int amdgpu_mes_kiq;
+extern int amdgpu_uni_mes;
+extern int amdgpu_noretry;
+extern int amdgpu_force_asic_type;
+extern int amdgpu_smartshift_bias;
+extern int amdgpu_use_xgmi_p2p;
+extern int amdgpu_mtype_local;
+extern int amdgpu_enforce_isolation;
+#ifdef CONFIG_HSA_AMD
+extern int sched_policy;
+extern bool debug_evictions;
+extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
+extern uint amdgpu_svm_default_granularity;
+#else
+static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool __maybe_unused debug_evictions; /* = false */
+static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
+#endif
+#ifdef CONFIG_HSA_AMD_P2P
+extern bool pcie_p2p;
+#endif
+
+extern int amdgpu_tmz;
+extern int amdgpu_reset_method;
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+extern int amdgpu_si_support;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern int amdgpu_cik_support;
+#endif
+extern int amdgpu_num_kcq;
+
+#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
+#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
+extern int amdgpu_vcnfw_log;
+extern int amdgpu_sg_display;
+extern int amdgpu_umsch_mm;
+extern int amdgpu_seamless;
+extern int amdgpu_umsch_mm_fwlog;
+
+extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
+extern int amdgpu_rebar;
+
+extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
+
+#define AMDGPU_VM_MAX_NUM_CTX			4096
+#define AMDGPU_SG_THRESHOLD			(256*1024*1024)
+#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
+#define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
+#define AMDGPU_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
+#define AMDGPU_DEBUGFS_MAX_COMPONENTS		32
+#define AMDGPUFB_CONN_LIMIT			4
+#define AMDGPU_BIOS_NUM_SCRATCH			16
+
+#define AMDGPU_VBIOS_VGA_ALLOCATION		(9 * 1024 * 1024) /* reserve 8MB for vga emulator and 1 MB for FB */
+
+/* hard reset data */
+#define AMDGPU_ASIC_RESET_DATA                  0x39d5e86b
+
+/* reset flags */
+#define AMDGPU_RESET_GFX			(1 << 0)
+#define AMDGPU_RESET_COMPUTE			(1 << 1)
+#define AMDGPU_RESET_DMA			(1 << 2)
+#define AMDGPU_RESET_CP				(1 << 3)
+#define AMDGPU_RESET_GRBM			(1 << 4)
+#define AMDGPU_RESET_DMA1			(1 << 5)
+#define AMDGPU_RESET_RLC			(1 << 6)
+#define AMDGPU_RESET_SEM			(1 << 7)
+#define AMDGPU_RESET_IH				(1 << 8)
+#define AMDGPU_RESET_VMC			(1 << 9)
+#define AMDGPU_RESET_MC				(1 << 10)
+#define AMDGPU_RESET_DISPLAY			(1 << 11)
+#define AMDGPU_RESET_UVD			(1 << 12)
+#define AMDGPU_RESET_VCE			(1 << 13)
+#define AMDGPU_RESET_VCE1			(1 << 14)
+
+/* reset mask */
+#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
+#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
+#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
+#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */
+
+/* max cursor sizes (in pixels) */
+#define CIK_CURSOR_WIDTH 128
+#define CIK_CURSOR_HEIGHT 128
+
+/* smart shift bias level limits */
+#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
+#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+
+/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
+#define AMDGPU_SWCTF_EXTRA_DELAY		50
+
+struct amdgpu_xcp_mgr;
+struct amdgpu_device;
+struct amdgpu_irq_src;
+struct amdgpu_fpriv;
+struct amdgpu_bo_va_mapping;
+struct kfd_vm_fault_info;
+struct amdgpu_hive_info;
+struct amdgpu_reset_context;
+struct amdgpu_reset_control;
+
+enum amdgpu_cp_irq {
+	AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
+	AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP,
+	AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP,
+
+	AMDGPU_CP_IRQ_LAST
+};
+
+enum amdgpu_thermal_irq {
+	AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
+	AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
+
+	AMDGPU_THERMAL_IRQ_LAST
+};
+
+enum amdgpu_kiq_irq {
+	AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
+	AMDGPU_CP_KIQ_IRQ_LAST
+};
+#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 1000
+
+int amdgpu_device_ip_set_clockgating_state(void *dev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_clockgating_state state);
+int amdgpu_device_ip_set_powergating_state(void *dev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_powergating_state state);
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+					    u64 *flags);
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+				   enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+			    enum amd_ip_block_type block_type);
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+			      enum amd_ip_block_type block_type);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
+
+#define AMDGPU_MAX_IP_NUM AMD_IP_BLOCK_TYPE_NUM
+
+struct amdgpu_ip_block_status {
+	bool valid;
+	bool sw;
+	bool hw;
+	bool late_initialized;
+	bool hang;
+};
+
+struct amdgpu_ip_block_version {
+	const enum amd_ip_block_type type;
+	const u32 major;
+	const u32 minor;
+	const u32 rev;
+	const struct amd_ip_funcs *funcs;
+};
+
+struct amdgpu_ip_block {
+	struct amdgpu_ip_block_status status;
+	const struct amdgpu_ip_block_version *version;
+	struct amdgpu_device *adev;
+};
+
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+				       enum amd_ip_block_type type,
+				       u32 major, u32 minor);
+
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+			      enum amd_ip_block_type type);
+
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+			       const struct amdgpu_ip_block_version *ip_block_version);
+
+/*
+ * BIOS.
+ */
+bool amdgpu_get_bios(struct amdgpu_device *adev);
+bool amdgpu_read_bios(struct amdgpu_device *adev);
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+				     u8 *bios, u32 length_bytes);
+void amdgpu_bios_release(struct amdgpu_device *adev);
+/*
+ * Clocks
+ */
+
+#define AMDGPU_MAX_PPLL 3
+
+struct amdgpu_clock {
+	struct amdgpu_pll ppll[AMDGPU_MAX_PPLL];
+	struct amdgpu_pll spll;
+	struct amdgpu_pll mpll;
+	/* 10 Khz units */
+	uint32_t default_mclk;
+	uint32_t default_sclk;
+	uint32_t default_dispclk;
+	uint32_t dp_extclk;
+	uint32_t max_pixel_clock;
+};
+
+/* sub-allocation manager, it has to be protected by another lock.
+ * By conception this is an helper for other part of the driver
+ * like the indirect buffer or semaphore, which both have their
+ * locking.
+ *
+ * Principe is simple, we keep a list of sub allocation in offset
+ * order (first entry has offset == 0, last entry has the highest
+ * offset).
+ *
+ * When allocating new object we first check if there is room at
+ * the end total_size - (last_object_offset + last_object_size) >=
+ * alloc_size. If so we allocate new object there.
+ *
+ * When there is not enough room at the end, we start waiting for
+ * each sub object until we reach object_offset+object_size >=
+ * alloc_size, this object then become the sub object we return.
+ *
+ * Alignment can't be bigger than page size.
+ *
+ * Hole are not considered for allocation to keep things simple.
+ * Assumption is that there won't be hole (all object on same
+ * alignment).
+ */
+
+struct amdgpu_sa_manager {
+	struct drm_suballoc_manager	base;
+	struct amdgpu_bo		*bo;
+	uint64_t			gpu_addr;
+	void				*cpu_ptr;
+};
+
+/*
+ * IRQS.
+ */
+
+struct amdgpu_flip_work {
+	struct delayed_work		flip_work;
+	struct work_struct		unpin_work;
+	struct amdgpu_device		*adev;
+	int				crtc_id;
+	u32				target_vblank;
+	uint64_t			base;
+	struct drm_pending_vblank_event *event;
+	struct amdgpu_bo		*old_abo;
+	unsigned			shared_count;
+	struct dma_fence		**shared;
+	struct dma_fence_cb		cb;
+	bool				async;
+};
+
+/*
+ * file private structure
+ */
+
+struct amdgpu_fpriv {
+	struct amdgpu_vm	vm;
+	struct amdgpu_bo_va	*prt_va;
+	struct amdgpu_bo_va	*csa_va;
+	struct amdgpu_bo_va	*seq64_va;
+	struct mutex		bo_list_lock;
+	struct idr		bo_list_handles;
+	struct amdgpu_ctx_mgr	ctx_mgr;
+	struct amdgpu_userq_mgr	userq_mgr;
+
+	/* Eviction fence infra */
+	struct amdgpu_eviction_fence_mgr evf_mgr;
+
+	/** GPU partition selection */
+	uint32_t		xcp_id;
+};
+
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
+/*
+ * Writeback
+ */
+#define AMDGPU_MAX_WB 1024	/* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
+struct amdgpu_wb {
+
+	/**
+	 * @wb_obj:
+	 *
+	 * Buffer Object used for the writeback memory.
+	 */
+	struct amdgpu_bo	*wb_obj;
+
+	/**
+	 * @wb:
+	 *
+	 * Pointer to the first writeback slot. In terms of CPU address
+	 * this value can be accessed directly by using the offset as an index.
+	 * For the GPU address, it is necessary to use gpu_addr and the offset.
+	 */
+	uint32_t		*wb;
+
+	/**
+	 * @gpu_addr:
+	 *
+	 * Writeback base address in the GPU.
+	 */
+	uint64_t		gpu_addr;
+
+	/**
+	 * @num_wb:
+	 *
+	 * Number of writeback slots reserved for amdgpu.
+	 */
+	u32			num_wb;
+
+	/**
+	 * @used:
+	 *
+	 * Track the writeback slot already used.
+	 */
+	unsigned long		used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+	/**
+	 * @lock:
+	 *
+	 * Protects read and write of the used field array.
+	 */
+	spinlock_t		lock;
+};
+
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
+
+/*
+ * Benchmarking
+ */
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
+
+/*
+ * ASIC specific register table accessible by UMD
+ */
+struct amdgpu_allowed_register_entry {
+	uint32_t reg_offset;
+	bool grbm_indexed;
+};
+
+/**
+ * enum amd_reset_method - Methods for resetting AMD GPU devices
+ *
+ * @AMD_RESET_METHOD_NONE: The device will not be reset.
+ * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs.
+ * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the
+ *                   any device.
+ * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.)
+ *                   individually. Suitable only for some discrete GPU, not
+ *                   available for all ASICs.
+ * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs
+ *                   are reset depends on the ASIC. Notably doesn't reset IPs
+ *                   shared with the CPU on APUs or the memory controllers (so
+ *                   VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
+ * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
+ *                  but without powering off the PCI bus. Suitable only for
+ *                  discrete GPUs.
+ * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset
+ *                 and does a secondary bus reset or FLR, depending on what the
+ *                 underlying hardware supports.
+ *
+ * Methods available for AMD GPU driver for resetting the device. Not all
+ * methods are suitable for every device. User can override the method using
+ * module parameter `reset_method`.
+ */
+enum amd_reset_method {
+	AMD_RESET_METHOD_NONE = -1,
+	AMD_RESET_METHOD_LEGACY = 0,
+	AMD_RESET_METHOD_MODE0,
+	AMD_RESET_METHOD_MODE1,
+	AMD_RESET_METHOD_MODE2,
+	AMD_RESET_METHOD_LINK,
+	AMD_RESET_METHOD_BACO,
+	AMD_RESET_METHOD_PCI,
+	AMD_RESET_METHOD_ON_INIT,
+};
+
+struct amdgpu_video_codec_info {
+	u32 codec_type;
+	u32 max_width;
+	u32 max_height;
+	u32 max_pixels_per_frame;
+	u32 max_level;
+};
+
+#define codec_info_build(type, width, height, level) \
+			 .codec_type = type,\
+			 .max_width = width,\
+			 .max_height = height,\
+			 .max_pixels_per_frame = height * width,\
+			 .max_level = level,
+
+struct amdgpu_video_codecs {
+	const u32 codec_count;
+	const struct amdgpu_video_codec_info *codec_array;
+};
+
+/*
+ * ASIC specific functions.
+ */
+struct amdgpu_asic_funcs {
+	bool (*read_disabled_bios)(struct amdgpu_device *adev);
+	bool (*read_bios_from_rom)(struct amdgpu_device *adev,
+				   u8 *bios, u32 length_bytes);
+	int (*read_register)(struct amdgpu_device *adev, u32 se_num,
+			     u32 sh_num, u32 reg_offset, u32 *value);
+	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
+	int (*reset)(struct amdgpu_device *adev);
+	enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
+	/* get the reference clock */
+	u32 (*get_xclk)(struct amdgpu_device *adev);
+	/* MM block clocks */
+	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
+	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* static power management */
+	int (*get_pcie_lanes)(struct amdgpu_device *adev);
+	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
+	/* get config memsize register */
+	u32 (*get_config_memsize)(struct amdgpu_device *adev);
+	/* flush hdp write queue */
+	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+	/* invalidate hdp read cache */
+	void (*invalidate_hdp)(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring);
+	/* check if the asic needs a full reset of if soft reset will work */
+	bool (*need_full_reset)(struct amdgpu_device *adev);
+	/* initialize doorbell layout for specific asic*/
+	void (*init_doorbell_index)(struct amdgpu_device *adev);
+	/* PCIe bandwidth usage */
+	void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
+			       uint64_t *count1);
+	/* do we need to reset the asic at init time (e.g., kexec) */
+	bool (*need_reset_on_init)(struct amdgpu_device *adev);
+	/* PCIe replay counter */
+	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
+	/* device supports BACO */
+	int (*supports_baco)(struct amdgpu_device *adev);
+	/* pre asic_init quirks */
+	void (*pre_asic_init)(struct amdgpu_device *adev);
+	/* enter/exit umd stable pstate */
+	int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
+	/* query video codecs */
+	int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
+				  const struct amdgpu_video_codecs **codecs);
+	/* encode "> 32bits" smn addressing */
+	u64 (*encode_ext_smn_addressing)(int ext_id);
+
+	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
+				 enum amdgpu_reg_state reg_state, void *buf,
+				 size_t max_size);
+};
+
+/*
+ * IOCTL.
+ */
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+
+int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *filp);
+int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+
+/* VRAM scratch page for HDP bug, default vram page */
+struct amdgpu_mem_scratch {
+	struct amdgpu_bo		*robj;
+	uint32_t			*ptr;
+	u64				gpu_addr;
+};
+
+/*
+ * CGS
+ */
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
+
+/*
+ * Core structure, functions and helpers.
+ */
+typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+
+typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t);
+
+typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t);
+typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t);
+
+typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t);
+typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t);
+
+typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
+typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
+
+struct amdgpu_mmio_remap {
+	u32 reg_offset;
+	resource_size_t bus_addr;
+	struct amdgpu_bo *bo;
+};
+
+/* Define the HW IP blocks will be used in driver , add more if necessary */
+enum amd_hw_ip_block_type {
+	GC_HWIP = 1,
+	HDP_HWIP,
+	SDMA0_HWIP,
+	SDMA1_HWIP,
+	SDMA2_HWIP,
+	SDMA3_HWIP,
+	SDMA4_HWIP,
+	SDMA5_HWIP,
+	SDMA6_HWIP,
+	SDMA7_HWIP,
+	LSDMA_HWIP,
+	MMHUB_HWIP,
+	ATHUB_HWIP,
+	NBIO_HWIP,
+	MP0_HWIP,
+	MP1_HWIP,
+	UVD_HWIP,
+	VCN_HWIP = UVD_HWIP,
+	JPEG_HWIP = VCN_HWIP,
+	VCN1_HWIP,
+	VCE_HWIP,
+	VPE_HWIP,
+	DF_HWIP,
+	DCE_HWIP,
+	OSSSYS_HWIP,
+	SMUIO_HWIP,
+	PWR_HWIP,
+	NBIF_HWIP,
+	THM_HWIP,
+	CLK_HWIP,
+	UMC_HWIP,
+	RSMU_HWIP,
+	XGMI_HWIP,
+	DCI_HWIP,
+	PCIE_HWIP,
+	ISP_HWIP,
+	MAX_HWIP
+};
+
+#define HWIP_MAX_INSTANCE	44
+
+#define HW_ID_MAX		300
+#define IP_VERSION_FULL(mj, mn, rv, var, srev) \
+	(((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev))
+#define IP_VERSION(mj, mn, rv)		IP_VERSION_FULL(mj, mn, rv, 0, 0)
+#define IP_VERSION_MAJ(ver)		((ver) >> 24)
+#define IP_VERSION_MIN(ver)		(((ver) >> 16) & 0xFF)
+#define IP_VERSION_REV(ver)		(((ver) >> 8) & 0xFF)
+#define IP_VERSION_VARIANT(ver)		(((ver) >> 4) & 0xF)
+#define IP_VERSION_SUBREV(ver)		((ver) & 0xF)
+#define IP_VERSION_MAJ_MIN_REV(ver)	((ver) >> 8)
+
+struct amdgpu_ip_map_info {
+	/* Map of logical to actual dev instances/mask */
+	uint32_t 		dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE];
+	int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev,
+				      enum amd_hw_ip_block_type block,
+				      int8_t inst);
+	uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev,
+					enum amd_hw_ip_block_type block,
+					uint32_t mask);
+};
+
+enum amdgpu_uid_type {
+	AMDGPU_UID_TYPE_XCD,
+	AMDGPU_UID_TYPE_AID,
+	AMDGPU_UID_TYPE_SOC,
+	AMDGPU_UID_TYPE_MAX
+};
+
+#define AMDGPU_UID_INST_MAX 8 /* max number of instances for each UID type */
+
+struct amdgpu_uid {
+	uint64_t uid[AMDGPU_UID_TYPE_MAX][AMDGPU_UID_INST_MAX];
+	struct amdgpu_device *adev;
+};
+
+struct amd_powerplay {
+	void *pp_handle;
+	const struct amd_pm_funcs *pp_funcs;
+};
+
+/* polaris10 kickers */
+#define ASICID_IS_P20(did, rid)		(((did == 0x67DF) && \
+					 ((rid == 0xE3) || \
+					  (rid == 0xE4) || \
+					  (rid == 0xE5) || \
+					  (rid == 0xE7) || \
+					  (rid == 0xEF))) || \
+					 ((did == 0x6FDF) && \
+					 ((rid == 0xE7) || \
+					  (rid == 0xEF) || \
+					  (rid == 0xFF))))
+
+#define ASICID_IS_P30(did, rid)		((did == 0x67DF) && \
+					((rid == 0xE1) || \
+					 (rid == 0xF7)))
+
+/* polaris11 kickers */
+#define ASICID_IS_P21(did, rid)		(((did == 0x67EF) && \
+					 ((rid == 0xE0) || \
+					  (rid == 0xE5))) || \
+					 ((did == 0x67FF) && \
+					 ((rid == 0xCF) || \
+					  (rid == 0xEF) || \
+					  (rid == 0xFF))))
+
+#define ASICID_IS_P31(did, rid)		((did == 0x67EF) && \
+					((rid == 0xE2)))
+
+/* polaris12 kickers */
+#define ASICID_IS_P23(did, rid)		(((did == 0x6987) && \
+					 ((rid == 0xC0) || \
+					  (rid == 0xC1) || \
+					  (rid == 0xC3) || \
+					  (rid == 0xC7))) || \
+					 ((did == 0x6981) && \
+					 ((rid == 0x00) || \
+					  (rid == 0x01) || \
+					  (rid == 0x10))))
+
+struct amdgpu_mqd_prop {
+	uint64_t mqd_gpu_addr;
+	uint64_t hqd_base_gpu_addr;
+	uint64_t rptr_gpu_addr;
+	uint64_t wptr_gpu_addr;
+	uint32_t queue_size;
+	bool use_doorbell;
+	uint32_t doorbell_index;
+	uint64_t eop_gpu_addr;
+	uint32_t hqd_pipe_priority;
+	uint32_t hqd_queue_priority;
+	bool allow_tunneling;
+	bool hqd_active;
+	uint64_t shadow_addr;
+	uint64_t gds_bkup_addr;
+	uint64_t csa_addr;
+	uint64_t fence_address;
+	bool tmz_queue;
+	bool kernel_queue;
+};
+
+struct amdgpu_mqd {
+	unsigned mqd_size;
+	int (*init_mqd)(struct amdgpu_device *adev, void *mqd,
+			struct amdgpu_mqd_prop *p);
+};
+
+struct amdgpu_pcie_reset_ctx {
+	bool in_link_reset;
+	bool occurs_dpc;
+	bool audio_suspended;
+	struct pci_dev *swus;
+	struct pci_saved_state *swus_pcistate;
+	struct pci_saved_state *swds_pcistate;
+};
+
+/*
+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+	AMDGPU_INIT_LEVEL_DEFAULT,
+	AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+	AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+};
+
+struct amdgpu_init_level {
+	enum amdgpu_init_lvl_id level;
+	uint32_t hwini_ip_block_mask;
+};
+
+#define AMDGPU_RESET_MAGIC_NUM 64
+#define AMDGPU_MAX_DF_PERFMONS 4
+struct amdgpu_reset_domain;
+struct amdgpu_fru_info;
+
+enum amdgpu_enforce_isolation_mode {
+	AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+	AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+	AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+	AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
+
+struct amdgpu_device {
+	struct device			*dev;
+	struct pci_dev			*pdev;
+	struct drm_device		ddev;
+
+#ifdef CONFIG_DRM_AMD_ACP
+	struct amdgpu_acp		acp;
+#endif
+	struct amdgpu_hive_info *hive;
+	struct amdgpu_xcp_mgr *xcp_mgr;
+	/* ASIC */
+	enum amd_asic_type		asic_type;
+	uint32_t			family;
+	uint32_t			rev_id;
+	uint32_t			external_rev_id;
+	unsigned long			flags;
+	unsigned long			apu_flags;
+	int				usec_timeout;
+	const struct amdgpu_asic_funcs	*asic_funcs;
+	bool				shutdown;
+	bool				need_swiotlb;
+	bool				accel_working;
+	struct notifier_block		acpi_nb;
+	struct notifier_block		pm_nb;
+	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
+	struct debugfs_blob_wrapper debugfs_vbios_blob;
+	struct mutex			srbm_mutex;
+	/* GRBM index mutex. Protects concurrent access to GRBM index */
+	struct mutex                    grbm_idx_mutex;
+	struct dev_pm_domain		vga_pm_domain;
+	bool				have_disp_power_ref;
+	bool                            have_atomics_support;
+
+	/* BIOS */
+	bool				is_atom_fw;
+	uint8_t				*bios;
+	uint32_t			bios_size;
+	uint32_t			bios_scratch_reg_offset;
+	uint32_t			bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
+
+	/* Register/doorbell mmio */
+	resource_size_t			rmmio_base;
+	resource_size_t			rmmio_size;
+	void __iomem			*rmmio;
+	/* protects concurrent MM_INDEX/DATA based register access */
+	spinlock_t mmio_idx_lock;
+	struct amdgpu_mmio_remap        rmmio_remap;
+	/* protects concurrent SMC based register access */
+	spinlock_t smc_idx_lock;
+	amdgpu_rreg_t			smc_rreg;
+	amdgpu_wreg_t			smc_wreg;
+	/* protects concurrent PCIE register access */
+	spinlock_t pcie_idx_lock;
+	amdgpu_rreg_t			pcie_rreg;
+	amdgpu_wreg_t			pcie_wreg;
+	amdgpu_rreg_t			pciep_rreg;
+	amdgpu_wreg_t			pciep_wreg;
+	amdgpu_rreg_ext_t		pcie_rreg_ext;
+	amdgpu_wreg_ext_t		pcie_wreg_ext;
+	amdgpu_rreg64_t			pcie_rreg64;
+	amdgpu_wreg64_t			pcie_wreg64;
+	amdgpu_rreg64_ext_t			pcie_rreg64_ext;
+	amdgpu_wreg64_ext_t			pcie_wreg64_ext;
+	/* protects concurrent UVD register access */
+	spinlock_t uvd_ctx_idx_lock;
+	amdgpu_rreg_t			uvd_ctx_rreg;
+	amdgpu_wreg_t			uvd_ctx_wreg;
+	/* protects concurrent DIDT register access */
+	spinlock_t didt_idx_lock;
+	amdgpu_rreg_t			didt_rreg;
+	amdgpu_wreg_t			didt_wreg;
+	/* protects concurrent gc_cac register access */
+	spinlock_t gc_cac_idx_lock;
+	amdgpu_rreg_t			gc_cac_rreg;
+	amdgpu_wreg_t			gc_cac_wreg;
+	/* protects concurrent se_cac register access */
+	spinlock_t se_cac_idx_lock;
+	amdgpu_rreg_t			se_cac_rreg;
+	amdgpu_wreg_t			se_cac_wreg;
+	/* protects concurrent ENDPOINT (audio) register access */
+	spinlock_t audio_endpt_idx_lock;
+	amdgpu_block_rreg_t		audio_endpt_rreg;
+	amdgpu_block_wreg_t		audio_endpt_wreg;
+	struct amdgpu_doorbell		doorbell;
+
+	/* clock/pll info */
+	struct amdgpu_clock            clock;
+
+	/* MC */
+	struct amdgpu_gmc		gmc;
+	struct amdgpu_gart		gart;
+	dma_addr_t			dummy_page_addr;
+	struct amdgpu_vm_manager	vm_manager;
+	struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS];
+	DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS);
+
+	/* memory management */
+	struct amdgpu_mman		mman;
+	struct amdgpu_mem_scratch	mem_scratch;
+	struct amdgpu_wb		wb;
+	atomic64_t			num_bytes_moved;
+	atomic64_t			num_evictions;
+	atomic64_t			num_vram_cpu_page_faults;
+	atomic_t			gpu_reset_counter;
+	atomic_t			vram_lost_counter;
+
+	/* data for buffer migration throttling */
+	struct {
+		spinlock_t		lock;
+		s64			last_update_us;
+		s64			accum_us; /* accumulated microseconds */
+		s64			accum_us_vis; /* for visible VRAM */
+		u32			log2_max_MBps;
+	} mm_stats;
+
+	/* discovery*/
+	struct amdgpu_discovery_info discovery;
+
+	/* display */
+	bool				enable_virtual_display;
+	struct amdgpu_vkms_output       *amdgpu_vkms_output;
+	struct amdgpu_mode_info		mode_info;
+	/* For pre-DCE11. DCE11 and later are in "struct amdgpu_device->dm" */
+	struct delayed_work         hotplug_work;
+	struct amdgpu_irq_src		crtc_irq;
+	struct amdgpu_irq_src		vline0_irq;
+	struct amdgpu_irq_src		vupdate_irq;
+	struct amdgpu_irq_src		pageflip_irq;
+	struct amdgpu_irq_src		hpd_irq;
+	struct amdgpu_irq_src		dmub_trace_irq;
+	struct amdgpu_irq_src		dmub_outbox_irq;
+
+	/* rings */
+	u64				fence_context;
+	unsigned			num_rings;
+	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
+	struct dma_fence __rcu		*gang_submit;
+	bool				ib_pool_ready;
+	struct amdgpu_sa_manager	ib_pools[AMDGPU_IB_POOL_MAX];
+	struct amdgpu_sched		gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+
+	/* interrupts */
+	struct amdgpu_irq		irq;
+
+	/* powerplay */
+	struct amd_powerplay		powerplay;
+	struct amdgpu_pm		pm;
+	u64				cg_flags;
+	u32				pg_flags;
+
+	/* nbio */
+	struct amdgpu_nbio		nbio;
+
+	/* hdp */
+	struct amdgpu_hdp		hdp;
+
+	/* smuio */
+	struct amdgpu_smuio		smuio;
+
+	/* mmhub */
+	struct amdgpu_mmhub		mmhub;
+
+	/* gfxhub */
+	struct amdgpu_gfxhub		gfxhub;
+
+	/* gfx */
+	struct amdgpu_gfx		gfx;
+
+	/* sdma */
+	struct amdgpu_sdma		sdma;
+
+	/* lsdma */
+	struct amdgpu_lsdma		lsdma;
+
+	/* uvd */
+	struct amdgpu_uvd		uvd;
+
+	/* vce */
+	struct amdgpu_vce		vce;
+
+	/* vcn */
+	struct amdgpu_vcn		vcn;
+
+	/* jpeg */
+	struct amdgpu_jpeg		jpeg;
+
+	/* vpe */
+	struct amdgpu_vpe		vpe;
+
+	/* umsch */
+	struct amdgpu_umsch_mm		umsch_mm;
+	bool				enable_umsch_mm;
+
+	/* firmwares */
+	struct amdgpu_firmware		firmware;
+
+	/* PSP */
+	struct psp_context		psp;
+
+	/* GDS */
+	struct amdgpu_gds		gds;
+
+	/* for userq and VM fences */
+	struct amdgpu_seq64		seq64;
+
+	/* UMC */
+	struct amdgpu_umc		umc;
+
+	/* display related functionality */
+	struct amdgpu_display_manager dm;
+
+#if defined(CONFIG_DRM_AMD_ISP)
+	/* isp */
+	struct amdgpu_isp		isp;
+#endif
+
+	/* mes */
+	bool                            enable_mes;
+	bool                            enable_mes_kiq;
+	bool                            enable_uni_mes;
+	struct amdgpu_mes               mes;
+	struct amdgpu_mqd               mqds[AMDGPU_HW_IP_NUM];
+	const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+	/* xarray used to retrieve the user queue fence driver reference
+	 * in the EOP interrupt handler to signal the particular user
+	 * queue fence.
+	 */
+	struct xarray			userq_xa;
+	/**
+	 * @userq_doorbell_xa: Global user queue map (doorbell index → queue)
+	 * Key: doorbell_index (unique global identifier for the queue)
+	 * Value: struct amdgpu_usermode_queue
+	 */
+	struct xarray userq_doorbell_xa;
+
+	/* df */
+	struct amdgpu_df                df;
+
+	/* MCA */
+	struct amdgpu_mca               mca;
+
+	/* ACA */
+	struct amdgpu_aca		aca;
+
+	/* CPER */
+	struct amdgpu_cper		cper;
+
+	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
+	uint32_t		        harvest_ip_mask;
+	int				num_ip_blocks;
+	struct mutex	mn_lock;
+	DECLARE_HASHTABLE(mn_hash, 7);
+
+	/* tracking pinned memory */
+	atomic64_t vram_pin_size;
+	atomic64_t visible_pin_size;
+	atomic64_t gart_pin_size;
+
+	/* soc15 register offset based on ip, instance and  segment */
+	uint32_t		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+	struct amdgpu_ip_map_info	ip_map;
+
+	/* delayed work_func for deferring clockgating during resume */
+	struct delayed_work     delayed_init_work;
+
+	struct amdgpu_virt	virt;
+
+	/* record hw reset is performed */
+	bool has_hw_reset;
+	u8				reset_magic[AMDGPU_RESET_MAGIC_NUM];
+
+	/* s3/s4 mask */
+	bool                            in_suspend;
+	bool				in_s3;
+	bool				in_s4;
+	bool				in_s0ix;
+	suspend_state_t			last_suspend_state;
+
+	enum pp_mp1_state               mp1_state;
+	struct amdgpu_doorbell_index doorbell_index;
+
+	struct mutex			notifier_lock;
+
+	int asic_reset_res;
+	struct work_struct		xgmi_reset_work;
+	struct list_head		reset_list;
+
+	long				gfx_timeout;
+	long				sdma_timeout;
+	long				video_timeout;
+	long				compute_timeout;
+	long				psp_timeout;
+
+	uint64_t			unique_id;
+	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+	/* enable runtime pm on the device */
+	bool                            in_runpm;
+	bool                            has_pr3;
+
+	bool                            ucode_sysfs_en;
+
+	struct amdgpu_fru_info		*fru_info;
+	atomic_t			throttling_logging_enabled;
+	struct ratelimit_state		throttling_logging_rs;
+	uint32_t                        ras_hw_enabled;
+	uint32_t                        ras_enabled;
+	bool                            ras_default_ecc_enabled;
+
+	bool                            no_hw_access;
+	struct pci_saved_state          *pci_state;
+	pci_channel_state_t		pci_channel_state;
+
+	struct amdgpu_pcie_reset_ctx	pcie_reset_ctx;
+
+	/* Track auto wait count on s_barrier settings */
+	bool				barrier_has_auto_waitcnt;
+
+	struct amdgpu_reset_control     *reset_cntl;
+	uint32_t                        ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+	bool				ram_is_direct_mapped;
+
+	struct list_head                ras_list;
+
+	struct amdgpu_reset_domain	*reset_domain;
+
+	struct mutex			benchmark_mutex;
+
+	bool                            scpm_enabled;
+	uint32_t                        scpm_status;
+
+	struct work_struct		reset_work;
+
+	bool                            dc_enabled;
+	/* Mask of active clusters */
+	uint32_t			aid_mask;
+
+	/* Debug */
+	bool                            debug_vm;
+	bool                            debug_largebar;
+	bool                            debug_disable_soft_recovery;
+	bool                            debug_use_vram_fw_buf;
+	bool                            debug_enable_ras_aca;
+	bool                            debug_exp_resets;
+	bool                            debug_disable_gpu_ring_reset;
+	bool                            debug_vm_userptr;
+	bool                            debug_disable_ce_logs;
+	bool                            debug_enable_ce_cs;
+
+	/* Protection for the following isolation structure */
+	struct mutex                    enforce_isolation_mutex;
+	enum amdgpu_enforce_isolation_mode	enforce_isolation[MAX_XCP];
+	struct amdgpu_isolation {
+		void			*owner;
+		struct dma_fence	*spearhead;
+		struct amdgpu_sync	active;
+		struct amdgpu_sync	prev;
+	} isolation[MAX_XCP];
+
+	struct amdgpu_init_level *init_lvl;
+
+	/* This flag is used to determine how VRAM allocations are handled for APUs
+	 * in KFD: VRAM or GTT.
+	 */
+	bool                            apu_prefer_gtt;
+
+	bool                            userq_halt_for_enforce_isolation;
+	struct work_struct              userq_reset_work;
+	struct amdgpu_uid *uid_info;
+
+	/* KFD
+	 * Must be last --ends in a flexible-array member.
+	 */
+	struct amdgpu_kfd_dev		kfd;
+};
+
+static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
+					 uint8_t ip, uint8_t inst)
+{
+	/* This considers only major/minor/rev and ignores
+	 * subrevision/variant fields.
+	 */
+	return adev->ip_versions[ip][inst] & ~0xFFU;
+}
+
+static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev,
+					      uint8_t ip, uint8_t inst)
+{
+	/* This returns full version - major/minor/rev/variant/subrevision */
+	return adev->ip_versions[ip][inst];
+}
+
+static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+{
+	return container_of(ddev, struct amdgpu_device, ddev);
+}
+
+static inline struct drm_device *adev_to_drm(struct amdgpu_device *adev)
+{
+	return &adev->ddev;
+}
+
+static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
+{
+	return container_of(bdev, struct amdgpu_device, mman.bdev);
+}
+
+static inline bool amdgpu_is_multi_aid(struct amdgpu_device *adev)
+{
+	return !!adev->aid_mask;
+}
+
+int amdgpu_device_init(struct amdgpu_device *adev,
+		       uint32_t flags);
+void amdgpu_device_fini_hw(struct amdgpu_device *adev);
+void amdgpu_device_fini_sw(struct amdgpu_device *adev);
+
+int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
+
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+			     void *buf, size_t size, bool write);
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+				 void *buf, size_t size, bool write);
+
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+			       void *buf, size_t size, bool write);
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+			    uint32_t inst, uint32_t reg_addr, char reg_name[],
+			    uint32_t expected_value, uint32_t mask);
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
+			    uint32_t reg, uint32_t acc_flags);
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+				    u64 reg_addr);
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+				uint32_t reg, uint32_t acc_flags,
+				uint32_t xcc_id);
+void amdgpu_device_wreg(struct amdgpu_device *adev,
+			uint32_t reg, uint32_t v,
+			uint32_t acc_flags);
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+				     u64 reg_addr, u32 reg_data);
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+			    uint32_t reg, uint32_t v,
+			    uint32_t acc_flags,
+			    uint32_t xcc_id);
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
+			     uint32_t reg, uint32_t v, uint32_t xcc_id);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+				u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
+				  u32 reg_addr);
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+				  u64 reg_addr);
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
+				 u32 reg_addr, u32 reg_data);
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
+				   u32 reg_addr, u64 reg_data);
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+				   u64 reg_addr, u64 reg_data);
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev);
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+				       enum amd_asic_type asic_type);
+bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
+
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+				 struct amdgpu_reset_context *reset_context);
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+			 struct amdgpu_reset_context *reset_context);
+
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context);
+
+int emu_soc_asic_init(struct amdgpu_device *adev);
+
+/*
+ * Registers read & write functions.
+ */
+#define AMDGPU_REGS_NO_KIQ    (1<<1)
+#define AMDGPU_REGS_RLC	(1<<2)
+
+#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
+#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+
+#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
+#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
+
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
+#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0))
+#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
+#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
+#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
+#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
+#define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
+#define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
+#define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
+#define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v))
+#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg))
+#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v))
+#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg))
+#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))
+#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg))
+#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v))
+#define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))
+#define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))
+#define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg))
+#define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
+#define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
+#define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
+#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg))
+#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v))
+#define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
+#define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
+#define WREG32_P(reg, val, mask)				\
+	do {							\
+		uint32_t tmp_ = RREG32(reg);			\
+		tmp_ &= (mask);					\
+		tmp_ |= ((val) & ~(mask));			\
+		WREG32(reg, tmp_);				\
+	} while (0)
+#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+#define WREG32_PLL_P(reg, val, mask)				\
+	do {							\
+		uint32_t tmp_ = RREG32_PLL(reg);		\
+		tmp_ &= (mask);					\
+		tmp_ |= ((val) & ~(mask));			\
+		WREG32_PLL(reg, tmp_);				\
+	} while (0)
+
+#define WREG32_SMC_P(_Reg, _Val, _Mask)                         \
+	do {                                                    \
+		u32 tmp = RREG32_SMC(_Reg);                     \
+		tmp &= (_Mask);                                 \
+		tmp |= ((_Val) & ~(_Mask));                     \
+		WREG32_SMC(_Reg, tmp);                          \
+	} while (0)
+
+#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false))
+
+#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
+#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
+
+#define REG_SET_FIELD(orig_val, reg, field, field_val)			\
+	(((orig_val) & ~REG_FIELD_MASK(reg, field)) |			\
+	 (REG_FIELD_MASK(reg, field) & ((field_val) << REG_FIELD_SHIFT(reg, field))))
+
+#define REG_GET_FIELD(value, reg, field)				\
+	(((value) & REG_FIELD_MASK(reg, field)) >> REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_FIELD(reg, field, val)	\
+	WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_FIELD_OFFSET(reg, offset, field, val)	\
+	WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l))
+/*
+ * BIOS helpers.
+ */
+#define RBIOS8(i) (adev->bios[i])
+#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
+#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
+
+/*
+ * ASICs macro.
+ */
+#define amdgpu_asic_set_vga_state(adev, state) \
+    ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0)
+#define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
+#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))
+#define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
+#define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
+#define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
+#define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
+#define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
+#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
+#define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
+#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
+#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
+#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
+#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
+#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
+#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
+#define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))
+#define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev))
+#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \
+	((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)
+#define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c))
+
+#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter))
+
+#define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i))
+#define for_each_inst(i, inst_mask)        \
+	for (i = ffs(inst_mask); i-- != 0; \
+	     i = ffs(inst_mask & BIT_MASK_UPPER(i + 1)))
+
+/* Common functions */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+			      struct amdgpu_job *job,
+			      struct amdgpu_reset_context *reset_context);
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
+int amdgpu_device_pci_reset(struct amdgpu_device *adev);
+bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
+
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+				  u64 num_vis_bytes);
+int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
+					     const u32 *registers,
+					     const u32 array_size);
+
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
+bool amdgpu_device_supports_atpx(struct amdgpu_device *adev);
+bool amdgpu_device_supports_px(struct amdgpu_device *adev);
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev);
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev);
+int amdgpu_device_supports_baco(struct amdgpu_device *adev);
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_device *peer_adev);
+int amdgpu_device_baco_enter(struct amdgpu_device *adev);
+int amdgpu_device_baco_exit(struct amdgpu_device *adev);
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring);
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring);
+
+void amdgpu_device_halt(struct amdgpu_device *adev);
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+				u32 reg);
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+				u32 reg, u32 v);
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+					    struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+						  struct amdgpu_ring *ring,
+						  struct amdgpu_job *job);
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
+
+/* atpx handler */
+#if defined(CONFIG_VGA_SWITCHEROO)
+void amdgpu_register_atpx_handler(void);
+void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
+bool amdgpu_has_atpx(void);
+#else
+static inline void amdgpu_register_atpx_handler(void) {}
+static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
+static inline bool amdgpu_has_atpx(void) { return false; }
+#endif
+
+/*
+ * KMS
+ */
+extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
+extern const int amdgpu_max_kms_ioctl;
+
+int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags);
+void amdgpu_driver_unload_kms(struct drm_device *dev);
+int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
+void amdgpu_driver_postclose_kms(struct drm_device *dev,
+				 struct drm_file *file_priv);
+void amdgpu_driver_release_kms(struct drm_device *dev);
+
+int amdgpu_device_prepare(struct drm_device *dev);
+void amdgpu_device_complete(struct drm_device *dev);
+int amdgpu_device_suspend(struct drm_device *dev, bool fbcon);
+int amdgpu_device_resume(struct drm_device *dev, bool fbcon);
+u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc);
+int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
+void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
+int amdgpu_info_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *filp);
+
+/*
+ * functions used by amdgpu_encoder.c
+ */
+struct amdgpu_afmt_acr {
+	u32 clock;
+
+	int n_32khz;
+	int cts_32khz;
+
+	int n_44_1khz;
+	int cts_44_1khz;
+
+	int n_48khz;
+	int cts_48khz;
+
+};
+
+struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock);
+
+/* amdgpu_acpi.c */
+
+struct amdgpu_numa_info {
+	uint64_t size;
+	int pxm;
+	int nid;
+};
+
+/* ATCS Device/Driver State */
+#define AMDGPU_ATCS_PSC_DEV_STATE_D0		0
+#define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT	3
+#define AMDGPU_ATCS_PSC_DRV_STATE_OPR		0
+#define AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR	1
+
+#if defined(CONFIG_ACPI)
+int amdgpu_acpi_init(struct amdgpu_device *adev);
+void amdgpu_acpi_fini(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_power_shift_control_supported(void);
+int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
+						u8 perf_req, bool advertise);
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+				    u8 dev_state, bool drv_state);
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+				   enum amdgpu_ss ss_state);
+int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+			     u64 *tmr_size);
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+			     struct amdgpu_numa_info *numa_info);
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps);
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev);
+void amdgpu_acpi_detect(void);
+void amdgpu_acpi_release(void);
+#else
+static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
+static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev,
+					   u64 *tmr_offset, u64 *tmr_size)
+{
+	return -EINVAL;
+}
+static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev,
+					   int xcc_id,
+					   struct amdgpu_numa_info *numa_info)
+{
+	return -EINVAL;
+}
+static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
+static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; }
+static inline void amdgpu_acpi_detect(void) { }
+static inline void amdgpu_acpi_release(void) { }
+static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; }
+static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+						  u8 dev_state, bool drv_state) { return 0; }
+static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+						 enum amdgpu_ss ss_state)
+{
+	return 0;
+}
+static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { }
+#endif
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
+#else
+static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
+static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
+#endif
+
+#if defined(CONFIG_DRM_AMD_ISP)
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev);
+#endif
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
+pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev,
+					   pci_channel_state_t state);
+pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev);
+pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev);
+void amdgpu_pci_resume(struct pci_dev *pdev);
+
+bool amdgpu_device_cache_pci_state(struct pci_dev *pdev);
+bool amdgpu_device_load_pci_state(struct pci_dev *pdev);
+
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev);
+
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+			       enum amd_powergating_state state);
+
+static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev)
+{
+	return amdgpu_gpu_recovery != 0 &&
+		adev->gfx_timeout != MAX_SCHEDULE_TIMEOUT &&
+		adev->compute_timeout != MAX_SCHEDULE_TIMEOUT &&
+		adev->sdma_timeout != MAX_SCHEDULE_TIMEOUT &&
+		adev->video_timeout != MAX_SCHEDULE_TIMEOUT;
+}
+
+#include "amdgpu_object.h"
+
+static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
+{
+       return adev->gmc.tmz_enabled;
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+			   enum amdgpu_init_lvl_id lvl);
+
+static inline int amdgpu_device_bus_status_check(struct amdgpu_device *adev)
+{
+       u32 status;
+       int r;
+
+       r = pci_read_config_dword(adev->pdev, PCI_COMMAND, &status);
+       if (r || PCI_POSSIBLE_ERROR(status)) {
+		dev_err(adev->dev, "device lost from bus!");
+		return -ENODEV;
+       }
+
+       return 0;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+			   enum amdgpu_uid_type type, uint8_t inst,
+			   uint64_t uid);
+uint64_t amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+			       enum amdgpu_uid_type type, uint8_t inst);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
new file mode 100644
index 000000000000..9b3180449150
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -0,0 +1,984 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_aca.h"
+#include "amdgpu_ras.h"
+
+#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
+
+typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+
+static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
+	ACA_BANK_HWID(SMU,	0x01,	0x01),
+	ACA_BANK_HWID(PCS_XGMI, 0x50,	0x00),
+	ACA_BANK_HWID(UMC,	0x96,	0x00),
+};
+
+static void aca_banks_init(struct aca_banks *banks)
+{
+	if (!banks)
+		return;
+
+	memset(banks, 0, sizeof(*banks));
+	INIT_LIST_HEAD(&banks->list);
+}
+
+static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank)
+{
+	struct aca_bank_node *node;
+
+	if (!bank)
+		return -EINVAL;
+
+	node = kvzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	memcpy(&node->bank, bank, sizeof(*bank));
+
+	INIT_LIST_HEAD(&node->node);
+	list_add_tail(&node->node, &banks->list);
+
+	banks->nr_banks++;
+
+	return 0;
+}
+
+static void aca_banks_release(struct aca_banks *banks)
+{
+	struct aca_bank_node *node, *tmp;
+
+	if (list_empty(&banks->list))
+		return;
+
+	list_for_each_entry_safe(node, tmp, &banks->list, node) {
+		list_del(&node->node);
+		kvfree(node);
+		banks->nr_banks--;
+	}
+}
+
+static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+	if (!count)
+		return -EINVAL;
+
+	if (!smu_funcs || !smu_funcs->get_valid_aca_count)
+		return -EOPNOTSUPP;
+
+	return smu_funcs->get_valid_aca_count(adev, type, count);
+}
+
+static struct aca_regs_dump {
+	const char *name;
+	int reg_idx;
+} aca_regs[] = {
+	{"CONTROL",		ACA_REG_IDX_CTL},
+	{"STATUS",		ACA_REG_IDX_STATUS},
+	{"ADDR",		ACA_REG_IDX_ADDR},
+	{"MISC",		ACA_REG_IDX_MISC0},
+	{"CONFIG",		ACA_REG_IDX_CONFIG},
+	{"IPID",		ACA_REG_IDX_IPID},
+	{"SYND",		ACA_REG_IDX_SYND},
+	{"DESTAT",		ACA_REG_IDX_DESTAT},
+	{"DEADDR",		ACA_REG_IDX_DEADDR},
+	{"CONTROL_MASK",	ACA_REG_IDX_CTL_MASK},
+};
+
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
+			      struct ras_query_context *qctx)
+{
+	u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+	int i;
+
+	if (adev->debug_disable_ce_logs &&
+	    bank->smu_err_type == ACA_SMU_TYPE_CE &&
+	    !ACA_BANK_ERR_IS_DEFFERED(bank))
+		return;
+
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+	/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
+	for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+		RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
+			      idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+	if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+		RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
+}
+
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+	struct aca_hwip *hwip;
+	int hwid, mcatype;
+	u64 ipid;
+
+	if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+		return false;
+
+	hwip = &aca_hwid_mcatypes[type];
+	if (!hwip->hwid)
+		return false;
+
+	ipid = bank->regs[ACA_REG_IDX_IPID];
+	hwid = ACA_REG__IPID__HARDWAREID(ipid);
+	mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+	return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
+static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
+				       int start, int count,
+				       struct aca_banks *banks, struct ras_query_context *qctx)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+	struct aca_bank bank;
+	int i, max_count, ret;
+
+	if (!count)
+		return 0;
+
+	if (!smu_funcs || !smu_funcs->get_valid_aca_bank)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case ACA_SMU_TYPE_UE:
+		max_count = smu_funcs->max_ue_bank_count;
+		break;
+	case ACA_SMU_TYPE_CE:
+		max_count = smu_funcs->max_ce_bank_count;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (start + count > max_count)
+		return -EINVAL;
+
+	count = min_t(int, count, max_count);
+	for (i = 0; i < count; i++) {
+		memset(&bank, 0, sizeof(bank));
+		ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank);
+		if (ret)
+			return ret;
+
+		bank.smu_err_type = type;
+
+		/*
+		 * Poison being consumed when injecting a UE while running background workloads,
+		 * which are unexpected.
+		 */
+		if (type == ACA_SMU_TYPE_UE &&
+		    ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+		    !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+			continue;
+
+		aca_smu_bank_dump(adev, i, count, &bank, qctx);
+
+		ret = aca_banks_add_bank(banks, &bank);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+	const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+	/* Parse all deferred errors with UMC aca handle */
+	if (ACA_BANK_ERR_IS_DEFFERED(bank))
+		return handle->hwip == ACA_HWIP_TYPE_UMC;
+
+	if (!aca_bank_hwip_is_matched(bank, handle->hwip))
+		return false;
+
+	if (!bank_ops->aca_bank_is_valid)
+		return true;
+
+	return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data);
+}
+
+static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+	struct aca_bank_error *bank_error;
+
+	bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL);
+	if (!bank_error)
+		return NULL;
+
+	INIT_LIST_HEAD(&bank_error->node);
+	memcpy(&bank_error->info, info, sizeof(*info));
+
+	mutex_lock(&aerr->lock);
+	list_add_tail(&bank_error->node, &aerr->list);
+	aerr->nr_errors++;
+	mutex_unlock(&aerr->lock);
+
+	return bank_error;
+}
+
+static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+	struct aca_bank_error *bank_error = NULL;
+	struct aca_bank_info *tmp_info;
+	bool found = false;
+
+	mutex_lock(&aerr->lock);
+	list_for_each_entry(bank_error, &aerr->list, node) {
+		tmp_info = &bank_error->info;
+		if (tmp_info->socket_id == info->socket_id &&
+		    tmp_info->die_id == info->die_id) {
+			found = true;
+			goto out_unlock;
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&aerr->lock);
+
+	return found ? bank_error : NULL;
+}
+
+static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error)
+{
+	if (!aerr || !bank_error)
+		return;
+
+	list_del(&bank_error->node);
+	aerr->nr_errors--;
+
+	kvfree(bank_error);
+}
+
+static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info)
+{
+	struct aca_bank_error *bank_error;
+
+	if (!aerr || !info)
+		return NULL;
+
+	bank_error = find_bank_error(aerr, info);
+	if (bank_error)
+		return bank_error;
+
+	return new_bank_error(aerr, info);
+}
+
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+				   enum aca_error_type type, u64 count)
+{
+	struct aca_error_cache *error_cache = &handle->error_cache;
+	struct aca_bank_error *bank_error;
+	struct aca_error *aerr;
+
+	if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT)
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	aerr = &error_cache->errors[type];
+	bank_error = get_bank_error(aerr, info);
+	if (!bank_error)
+		return -ENOMEM;
+
+	bank_error->count += count;
+
+	return 0;
+}
+
+static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
+{
+	const struct aca_bank_ops *bank_ops = handle->bank_ops;
+
+	if (!bank)
+		return -EINVAL;
+
+	if (!bank_ops->aca_bank_parser)
+		return -EOPNOTSUPP;
+
+	return bank_ops->aca_bank_parser(handle, bank, type,
+					 handle->data);
+}
+
+static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
+				      enum aca_smu_type type, void *data)
+{
+	int ret;
+
+	ret = aca_bank_parser(handle, bank, type);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
+			     enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+	struct aca_handle *handle;
+	int ret;
+
+	if (list_empty(&mgr->list))
+		return 0;
+
+	list_for_each_entry(handle, &mgr->list, node) {
+		if (!aca_bank_is_valid(handle, bank, type))
+			continue;
+
+		ret = handler(handle, bank, type, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
+			      enum aca_smu_type type, bank_handler_t handler, void *data)
+{
+	struct aca_bank_node *node;
+	struct aca_bank *bank;
+	int ret;
+
+	if (!mgr || !banks)
+		return -EINVAL;
+
+	/* pre check to avoid unnecessary operations */
+	if (list_empty(&mgr->list) || list_empty(&banks->list))
+		return 0;
+
+	list_for_each_entry(node, &banks->list, node) {
+		bank = &node->bank;
+
+		ret = aca_dispatch_bank(mgr, bank, type, handler, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	bool ret = true;
+
+	/*
+	 * Because the UE Valid MCA count will only be cleared after reset,
+	 * in order to avoid repeated counting of the error count,
+	 * the aca bank is only updated once during the gpu recovery stage.
+	 */
+	if (type == ACA_SMU_TYPE_UE) {
+		if (amdgpu_ras_intr_triggered())
+			ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
+		else
+			atomic_set(&aca->ue_update_flag, 0);
+	}
+
+	return ret;
+}
+
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+				    enum aca_smu_type type,
+				    struct aca_banks *banks,
+				    int count)
+{
+	struct aca_bank_node *node;
+	struct aca_bank *bank;
+	int r;
+
+	if (!adev->cper.enabled)
+		return;
+
+	if (!banks || !count) {
+		dev_warn(adev->dev, "fail to generate cper records\n");
+		return;
+	}
+
+	/* UEs must be encoded into separate CPER entries */
+	if (type == ACA_SMU_TYPE_UE) {
+		struct aca_banks de_banks;
+
+		aca_banks_init(&de_banks);
+		list_for_each_entry(node, &banks->list, node) {
+			bank = &node->bank;
+			if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+				r = aca_banks_add_bank(&de_banks, bank);
+				if (r)
+					dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+			} else {
+				if (amdgpu_cper_generate_ue_record(adev, bank))
+					dev_warn(adev->dev, "fail to generate ue cper records\n");
+			}
+		}
+
+		if (!list_empty(&de_banks.list)) {
+			if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+				dev_warn(adev->dev, "fail to generate de cper records\n");
+		}
+
+		aca_banks_release(&de_banks);
+	} else {
+		/*
+		 * SMU_TYPE_CE banks are combined into 1 CPER entries,
+		 * they could be CEs or DEs or both
+		 */
+		if (amdgpu_cper_generate_ce_records(adev, banks, count))
+			dev_warn(adev->dev, "fail to generate ce cper records\n");
+	}
+}
+
+static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
+			    bank_handler_t handler, struct ras_query_context *qctx, void *data)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	struct aca_banks banks;
+	u32 count = 0;
+	int ret;
+
+	if (list_empty(&aca->mgr.list))
+		return 0;
+
+	if (!aca_bank_should_update(adev, type))
+		return 0;
+
+	ret = aca_smu_get_valid_aca_count(adev, type, &count);
+	if (ret)
+		return ret;
+
+	if (!count)
+		return 0;
+
+	aca_banks_init(&banks);
+
+	ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
+	if (ret)
+		goto err_release_banks;
+
+	if (list_empty(&banks.list)) {
+		ret = 0;
+		goto err_release_banks;
+	}
+
+	ret = aca_dispatch_banks(&aca->mgr, &banks, type,
+				 handler, data);
+	if (ret)
+		goto err_release_banks;
+
+	aca_banks_generate_cper(adev, type, &banks, count);
+
+err_release_banks:
+	aca_banks_release(&banks);
+
+	return ret;
+}
+
+static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data)
+{
+	struct aca_bank_info *info;
+	struct amdgpu_smuio_mcm_config_info mcm_info;
+	u64 count;
+
+	if (type >= ACA_ERROR_TYPE_COUNT)
+		return -EINVAL;
+
+	count = bank_error->count;
+	if (!count)
+		return 0;
+
+	info = &bank_error->info;
+	mcm_info.die_id = info->die_id;
+	mcm_info.socket_id = info->socket_id;
+
+	switch (type) {
+	case ACA_ERROR_TYPE_UE:
+		amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count);
+		break;
+	case ACA_ERROR_TYPE_CE:
+		amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count);
+		break;
+	case ACA_ERROR_TYPE_DEFERRED:
+		amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data)
+{
+	struct aca_error_cache *error_cache = &handle->error_cache;
+	struct aca_error *aerr = &error_cache->errors[type];
+	struct aca_bank_error *bank_error, *tmp;
+
+	mutex_lock(&aerr->lock);
+
+	if (list_empty(&aerr->list))
+		goto out_unlock;
+
+	list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) {
+		aca_log_aca_error_data(bank_error, type, err_data);
+		aca_bank_error_remove(aerr, bank_error);
+	}
+
+out_unlock:
+	mutex_unlock(&aerr->lock);
+
+	return 0;
+}
+
+static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
+				struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+	enum aca_smu_type smu_type;
+	int ret;
+
+	switch (type) {
+	case ACA_ERROR_TYPE_UE:
+		smu_type = ACA_SMU_TYPE_UE;
+		break;
+	case ACA_ERROR_TYPE_CE:
+	case ACA_ERROR_TYPE_DEFERRED:
+		smu_type = ACA_SMU_TYPE_CE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* update aca bank to aca source error_cache first */
+	ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
+	if (ret)
+		return ret;
+
+	/* DEs may contain in CEs or UEs */
+	if (type != ACA_ERROR_TYPE_DEFERRED)
+		aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
+	return aca_log_aca_error(handle, type, err_data);
+}
+
+static bool aca_handle_is_valid(struct aca_handle *handle)
+{
+	if (!handle->mask || !list_empty(&handle->node))
+		return false;
+
+	return true;
+}
+
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+			      enum aca_error_type type, struct ras_err_data *err_data,
+			      struct ras_query_context *qctx)
+{
+	if (!handle || !err_data)
+		return -EINVAL;
+
+	if (aca_handle_is_valid(handle))
+		return -EOPNOTSUPP;
+
+	if ((type < 0) || (!(BIT(type) & handle->mask)))
+		return  0;
+
+	return __aca_get_error_data(adev, handle, type, err_data, qctx);
+}
+
+static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
+{
+	mutex_init(&aerr->lock);
+	INIT_LIST_HEAD(&aerr->list);
+	aerr->type = type;
+	aerr->nr_errors = 0;
+}
+
+static void aca_init_error_cache(struct aca_handle *handle)
+{
+	struct aca_error_cache *error_cache = &handle->error_cache;
+	int type;
+
+	for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+		aca_error_init(&error_cache->errors[type], type);
+}
+
+static void aca_error_fini(struct aca_error *aerr)
+{
+	struct aca_bank_error *bank_error, *tmp;
+
+	mutex_lock(&aerr->lock);
+	if (list_empty(&aerr->list))
+		goto out_unlock;
+
+	list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
+		aca_bank_error_remove(aerr, bank_error);
+
+out_unlock:
+	mutex_destroy(&aerr->lock);
+}
+
+static void aca_fini_error_cache(struct aca_handle *handle)
+{
+	struct aca_error_cache *error_cache = &handle->error_cache;
+	int type;
+
+	for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++)
+		aca_error_fini(&error_cache->errors[type]);
+}
+
+static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle,
+			  const char *name, const struct aca_info *ras_info, void *data)
+{
+	memset(handle, 0, sizeof(*handle));
+
+	handle->adev = adev;
+	handle->mgr = mgr;
+	handle->name = name;
+	handle->hwip = ras_info->hwip;
+	handle->mask = ras_info->mask;
+	handle->bank_ops = ras_info->bank_ops;
+	handle->data = data;
+	aca_init_error_cache(handle);
+
+	INIT_LIST_HEAD(&handle->node);
+	list_add_tail(&handle->node, &mgr->list);
+	mgr->nr_handles++;
+
+	return 0;
+}
+
+static ssize_t aca_sysfs_read(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr);
+
+	/* NOTE: the aca cache will be auto cleared once read,
+	 * So the driver should unify the query entry point, forward request to ras query interface directly */
+	return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data);
+}
+
+static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle)
+{
+	struct device_attribute *aca_attr = &handle->aca_attr;
+
+	snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name);
+	aca_attr->show = aca_sysfs_read;
+	aca_attr->attr.name = handle->attr_name;
+	aca_attr->attr.mode = S_IRUGO;
+	sysfs_attr_init(&aca_attr->attr);
+
+	return sysfs_add_file_to_group(&adev->dev->kobj,
+				       &aca_attr->attr,
+				       "ras");
+}
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+			  const char *name, const struct aca_info *ras_info, void *data)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	int ret;
+
+	if (!amdgpu_aca_is_enabled(adev))
+		return 0;
+
+	ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data);
+	if (ret)
+		return ret;
+
+	return add_aca_sysfs(adev, handle);
+}
+
+static void remove_aca_handle(struct aca_handle *handle)
+{
+	struct aca_handle_manager *mgr = handle->mgr;
+
+	aca_fini_error_cache(handle);
+	list_del(&handle->node);
+	mgr->nr_handles--;
+}
+
+static void remove_aca_sysfs(struct aca_handle *handle)
+{
+	struct amdgpu_device *adev = handle->adev;
+	struct device_attribute *aca_attr = &handle->aca_attr;
+
+	if (adev->dev->kobj.sd)
+		sysfs_remove_file_from_group(&adev->dev->kobj,
+					     &aca_attr->attr,
+					     "ras");
+}
+
+void amdgpu_aca_remove_handle(struct aca_handle *handle)
+{
+	if (!handle || list_empty(&handle->node))
+		return;
+
+	remove_aca_sysfs(handle);
+	remove_aca_handle(handle);
+}
+
+static int aca_manager_init(struct aca_handle_manager *mgr)
+{
+	INIT_LIST_HEAD(&mgr->list);
+	mgr->nr_handles = 0;
+
+	return 0;
+}
+
+static void aca_manager_fini(struct aca_handle_manager *mgr)
+{
+	struct aca_handle *handle, *tmp;
+
+	if (list_empty(&mgr->list))
+		return;
+
+	list_for_each_entry_safe(handle, tmp, &mgr->list, node)
+		amdgpu_aca_remove_handle(handle);
+}
+
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
+{
+	return (adev->aca.is_enabled ||
+		adev->debug_enable_ras_aca);
+}
+
+int amdgpu_aca_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	int ret;
+
+	atomic_set(&aca->ue_update_flag, 0);
+
+	ret = aca_manager_init(&aca->mgr);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void amdgpu_aca_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+
+	aca_manager_fini(&aca->mgr);
+
+	atomic_set(&aca->ue_update_flag, 0);
+}
+
+int amdgpu_aca_reset(struct amdgpu_device *adev)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+
+	atomic_set(&aca->ue_update_flag, 0);
+
+	return 0;
+}
+
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+
+	WARN_ON(aca->smu_funcs);
+	aca->smu_funcs = smu_funcs;
+}
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
+{
+	u64 ipid;
+	u32 instidhi, instidlo;
+
+	if (!bank || !info)
+		return -EINVAL;
+
+	ipid = bank->regs[ACA_REG_IDX_IPID];
+	info->hwid = ACA_REG__IPID__HARDWAREID(ipid);
+	info->mcatype = ACA_REG__IPID__MCATYPE(ipid);
+	/*
+	 * Unfied DieID Format: SAASS. A:AID, S:Socket.
+	 * Unfied DieID[4:4] = InstanceId[0:0]
+	 * Unfied DieID[0:3] = InstanceIdHi[0:3]
+	 */
+	instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid);
+	instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid);
+	info->die_id = ((instidhi >> 2) & 0x03);
+	info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03);
+
+	return 0;
+}
+
+static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+	if (!smu_funcs || !smu_funcs->parse_error_code)
+		return -EOPNOTSUPP;
+
+	return smu_funcs->parse_error_code(adev, bank);
+}
+
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
+{
+	int i, error_code;
+
+	if (!bank || !err_codes)
+		return -EINVAL;
+
+	error_code = aca_bank_get_error_code(adev, bank);
+	if (error_code < 0)
+		return error_code;
+
+	for (i = 0; i < size; i++) {
+		if (err_codes[i] == error_code)
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en)
+{
+	struct amdgpu_aca *aca = &adev->aca;
+	const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
+
+	if (!smu_funcs || !smu_funcs->set_debug_mode)
+		return -EOPNOTSUPP;
+
+	return smu_funcs->set_debug_mode(adev, en);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	int ret;
+
+	ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false);
+	if (ret)
+		return ret;
+
+	dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off");
+
+	return 0;
+}
+
+static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
+{
+	struct aca_bank_info info;
+	int i, ret;
+
+	ret = aca_bank_info_decode(bank, &info);
+	if (ret)
+		return;
+
+	seq_printf(m, "aca entry[%d].type: %s\n", idx, type ==  ACA_SMU_TYPE_UE ? "UE" : "CE");
+	seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+		   idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
+
+	for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
+		seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]);
+}
+
+struct aca_dump_context {
+	struct seq_file *m;
+	int idx;
+};
+
+static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
+				 enum aca_smu_type type, void *data)
+{
+	struct aca_dump_context *ctx = (struct aca_dump_context *)data;
+
+	aca_dump_entry(ctx->m, bank, type, ctx->idx++);
+
+	return handler_aca_log_bank_error(handle, bank, type, NULL);
+}
+
+static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+	struct aca_dump_context context = {
+		.m = m,
+		.idx = 0,
+	};
+
+	return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
+}
+
+static int aca_dump_ce_show(struct seq_file *m, void *unused)
+{
+	return aca_dump_show(m, ACA_SMU_TYPE_CE);
+}
+
+static int aca_dump_ce_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, aca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations aca_ce_dump_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = aca_dump_ce_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int aca_dump_ue_show(struct seq_file *m, void *unused)
+{
+	return aca_dump_show(m, ACA_SMU_TYPE_UE);
+}
+
+static int aca_dump_ue_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, aca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations aca_ue_dump_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = aca_dump_ue_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+	if (!root)
+		return;
+
+	debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);
+	debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops);
+	debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
new file mode 100644
index 000000000000..38c88897e1ec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ACA_H__
+#define __AMDGPU_ACA_H__
+
+#include <linux/list.h>
+
+struct ras_err_data;
+struct ras_query_context;
+
+#define ACA_MAX_REGS_COUNT	(16)
+
+#define ACA_REG_FIELD(x, h, l)			(((x) & GENMASK_ULL(h, l)) >> l)
+#define ACA_REG__STATUS__VAL(x)			ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__STATUS__OVERFLOW(x)		ACA_REG_FIELD(x, 62, 62)
+#define ACA_REG__STATUS__UC(x)			ACA_REG_FIELD(x, 61, 61)
+#define ACA_REG__STATUS__EN(x)			ACA_REG_FIELD(x, 60, 60)
+#define ACA_REG__STATUS__MISCV(x)		ACA_REG_FIELD(x, 59, 59)
+#define ACA_REG__STATUS__ADDRV(x)		ACA_REG_FIELD(x, 58, 58)
+#define ACA_REG__STATUS__PCC(x)			ACA_REG_FIELD(x, 57, 57)
+#define ACA_REG__STATUS__ERRCOREIDVAL(x)	ACA_REG_FIELD(x, 56, 56)
+#define ACA_REG__STATUS__TCC(x)			ACA_REG_FIELD(x, 55, 55)
+#define ACA_REG__STATUS__SYNDV(x)		ACA_REG_FIELD(x, 53, 53)
+#define ACA_REG__STATUS__CECC(x)		ACA_REG_FIELD(x, 46, 46)
+#define ACA_REG__STATUS__UECC(x)		ACA_REG_FIELD(x, 45, 45)
+#define ACA_REG__STATUS__DEFERRED(x)		ACA_REG_FIELD(x, 44, 44)
+#define ACA_REG__STATUS__POISON(x)		ACA_REG_FIELD(x, 43, 43)
+#define ACA_REG__STATUS__SCRUB(x)		ACA_REG_FIELD(x, 40, 40)
+#define ACA_REG__STATUS__ERRCOREID(x)		ACA_REG_FIELD(x, 37, 32)
+#define ACA_REG__STATUS__ADDRLSB(x)		ACA_REG_FIELD(x, 29, 24)
+#define ACA_REG__STATUS__ERRORCODEEXT(x)	ACA_REG_FIELD(x, 21, 16)
+#define ACA_REG__STATUS__ERRORCODE(x)		ACA_REG_FIELD(x, 15, 0)
+
+#define ACA_REG__IPID__MCATYPE(x)		ACA_REG_FIELD(x, 63, 48)
+#define ACA_REG__IPID__INSTANCEIDHI(x)		ACA_REG_FIELD(x, 47, 44)
+#define ACA_REG__IPID__HARDWAREID(x)		ACA_REG_FIELD(x, 43, 32)
+#define ACA_REG__IPID__INSTANCEIDLO(x)		ACA_REG_FIELD(x, 31, 0)
+
+#define ACA_REG__MISC0__VALID(x)		ACA_REG_FIELD(x, 63, 63)
+#define ACA_REG__MISC0__OVRFLW(x)		ACA_REG_FIELD(x, 48, 48)
+#define ACA_REG__MISC0__ERRCNT(x)		ACA_REG_FIELD(x, 43, 32)
+
+#define ACA_REG__SYND__ERRORINFORMATION(x)	ACA_REG_FIELD(x, 17, 0)
+
+/* NOTE: The following codes refers to the smu header file */
+#define ACA_EXTERROR_CODE_CE			0x3a
+#define ACA_EXTERROR_CODE_FAULT			0x3b
+
+#define ACA_ERROR_UE_MASK		BIT_MASK(ACA_ERROR_TYPE_UE)
+#define ACA_ERROR_CE_MASK		BIT_MASK(ACA_ERROR_TYPE_CE)
+#define ACA_ERROR_DEFERRED_MASK		BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+
+#define mmSMNAID_AID0_MCA_SMU		0x03b30400	/* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU		0x36430400	/* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU		0x38430400	/* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU		0x40430400	/* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank)                                \
+	(ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+	 ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
+enum aca_reg_idx {
+	ACA_REG_IDX_CTL			= 0,
+	ACA_REG_IDX_STATUS		= 1,
+	ACA_REG_IDX_ADDR		= 2,
+	ACA_REG_IDX_MISC0		= 3,
+	ACA_REG_IDX_CONFIG		= 4,
+	ACA_REG_IDX_IPID		= 5,
+	ACA_REG_IDX_SYND		= 6,
+	ACA_REG_IDX_DESTAT		= 8,
+	ACA_REG_IDX_DEADDR		= 9,
+	ACA_REG_IDX_CTL_MASK		= 10,
+	ACA_REG_IDX_COUNT		= 16,
+};
+
+enum aca_hwip_type {
+	ACA_HWIP_TYPE_UNKNOW = -1,
+	ACA_HWIP_TYPE_PSP = 0,
+	ACA_HWIP_TYPE_UMC,
+	ACA_HWIP_TYPE_SMU,
+	ACA_HWIP_TYPE_PCS_XGMI,
+	ACA_HWIP_TYPE_COUNT,
+};
+
+enum aca_error_type {
+	ACA_ERROR_TYPE_INVALID = -1,
+	ACA_ERROR_TYPE_UE = 0,
+	ACA_ERROR_TYPE_CE,
+	ACA_ERROR_TYPE_DEFERRED,
+	ACA_ERROR_TYPE_COUNT
+};
+
+enum aca_smu_type {
+	ACA_SMU_TYPE_INVALID = -1,
+	ACA_SMU_TYPE_UE = 0,
+	ACA_SMU_TYPE_CE,
+	ACA_SMU_TYPE_COUNT,
+};
+
+struct aca_hwip {
+	int hwid;
+	int mcatype;
+};
+
+struct aca_bank {
+	enum aca_error_type aca_err_type;
+	enum aca_smu_type smu_err_type;
+	u64 regs[ACA_MAX_REGS_COUNT];
+};
+
+struct aca_bank_node {
+	struct aca_bank bank;
+	struct list_head node;
+};
+
+struct aca_banks {
+	int nr_banks;
+	struct list_head list;
+};
+
+struct aca_bank_info {
+	int die_id;
+	int socket_id;
+	int hwid;
+	int mcatype;
+};
+
+struct aca_bank_error {
+	struct list_head node;
+	struct aca_bank_info info;
+	u64 count;
+};
+
+struct aca_error {
+	struct list_head list;
+	struct mutex lock;
+	enum aca_error_type type;
+	int nr_errors;
+};
+
+struct aca_handle_manager {
+	struct list_head list;
+	int nr_handles;
+};
+
+struct aca_error_cache {
+	struct aca_error errors[ACA_ERROR_TYPE_COUNT];
+};
+
+struct aca_handle {
+	struct list_head node;
+	enum aca_hwip_type hwip;
+	struct amdgpu_device *adev;
+	struct aca_handle_manager *mgr;
+	struct aca_error_cache error_cache;
+	const struct aca_bank_ops *bank_ops;
+	struct device_attribute aca_attr;
+	char attr_name[64];
+	const char *name;
+	u32 mask;
+	void *data;
+};
+
+struct aca_bank_ops {
+	int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+	bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
+				  void *data);
+};
+
+struct aca_smu_funcs {
+	int max_ue_bank_count;
+	int max_ce_bank_count;
+	int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
+	int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+	int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+	int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
+};
+
+struct amdgpu_aca {
+	struct aca_handle_manager mgr;
+	const struct aca_smu_funcs *smu_funcs;
+	atomic_t ue_update_flag;
+	bool is_enabled;
+};
+
+struct aca_info {
+	enum aca_hwip_type hwip;
+	const struct aca_bank_ops *bank_ops;
+	u32 mask;
+};
+
+int amdgpu_aca_init(struct amdgpu_device *adev);
+void amdgpu_aca_fini(struct amdgpu_device *adev);
+int amdgpu_aca_reset(struct amdgpu_device *adev);
+void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
+bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);
+
+int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info);
+int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size);
+
+int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
+			  const char *name, const struct aca_info *aca_info, void *data);
+void amdgpu_aca_remove_handle(struct aca_handle *handle);
+int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
+			      enum aca_error_type type, struct ras_err_data *err_data,
+			      struct ras_query_context *qctx);
+int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
+void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+				   enum aca_error_type type, u64 count);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
new file mode 100644
index 000000000000..381ef205b0df
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -0,0 +1,629 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/pci.h>
+#include <linux/pm_domain.h>
+#include <linux/platform_device.h>
+#include <sound/designware_i2s.h>
+#include <sound/pcm.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_acp.h"
+
+#include "acp_gfx_if.h"
+
+#define ST_JADEITE 1
+#define ACP_TILE_ON_MASK			0x03
+#define ACP_TILE_OFF_MASK			0x02
+#define ACP_TILE_ON_RETAIN_REG_MASK		0x1f
+#define ACP_TILE_OFF_RETAIN_REG_MASK		0x20
+
+#define ACP_TILE_P1_MASK			0x3e
+#define ACP_TILE_P2_MASK			0x3d
+#define ACP_TILE_DSP0_MASK			0x3b
+#define ACP_TILE_DSP1_MASK			0x37
+
+#define ACP_TILE_DSP2_MASK			0x2f
+
+#define ACP_DMA_REGS_END			0x146c0
+#define ACP_I2S_PLAY_REGS_START			0x14840
+#define ACP_I2S_PLAY_REGS_END			0x148b4
+#define ACP_I2S_CAP_REGS_START			0x148b8
+#define ACP_I2S_CAP_REGS_END			0x1496c
+
+#define ACP_I2S_COMP1_CAP_REG_OFFSET		0xac
+#define ACP_I2S_COMP2_CAP_REG_OFFSET		0xa8
+#define ACP_I2S_COMP1_PLAY_REG_OFFSET		0x6c
+#define ACP_I2S_COMP2_PLAY_REG_OFFSET		0x68
+#define ACP_BT_PLAY_REGS_START			0x14970
+#define ACP_BT_PLAY_REGS_END			0x14a24
+#define ACP_BT_COMP1_REG_OFFSET			0xac
+#define ACP_BT_COMP2_REG_OFFSET			0xa8
+
+#define mmACP_PGFSM_RETAIN_REG			0x51c9
+#define mmACP_PGFSM_CONFIG_REG			0x51ca
+#define mmACP_PGFSM_READ_REG_0			0x51cc
+
+#define mmACP_MEM_SHUT_DOWN_REQ_LO		0x51f8
+#define mmACP_MEM_SHUT_DOWN_REQ_HI		0x51f9
+#define mmACP_MEM_SHUT_DOWN_STS_LO		0x51fa
+#define mmACP_MEM_SHUT_DOWN_STS_HI		0x51fb
+
+#define mmACP_CONTROL				0x5131
+#define mmACP_STATUS				0x5133
+#define mmACP_SOFT_RESET			0x5134
+#define ACP_CONTROL__ClkEn_MASK			0x1
+#define ACP_SOFT_RESET__SoftResetAud_MASK	0x100
+#define ACP_SOFT_RESET__SoftResetAudDone_MASK	0x1000000
+#define ACP_CLOCK_EN_TIME_OUT_VALUE		0x000000FF
+#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE	0x000000FF
+
+#define ACP_TIMEOUT_LOOP			0x000000FF
+#define ACP_DEVS				4
+#define ACP_SRC_ID				162
+
+static unsigned long acp_machine_id;
+
+enum {
+	ACP_TILE_P1 = 0,
+	ACP_TILE_P2,
+	ACP_TILE_DSP0,
+	ACP_TILE_DSP1,
+	ACP_TILE_DSP2,
+};
+
+static int acp_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->acp.parent = adev->dev;
+
+	adev->acp.cgs_device =
+		amdgpu_cgs_create_device(adev);
+	if (!adev->acp.cgs_device)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int acp_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (adev->acp.cgs_device)
+		amdgpu_cgs_destroy_device(adev->acp.cgs_device);
+
+	return 0;
+}
+
+struct acp_pm_domain {
+	void *adev;
+	struct generic_pm_domain gpd;
+};
+
+static int acp_poweroff(struct generic_pm_domain *genpd)
+{
+	struct acp_pm_domain *apd;
+	struct amdgpu_device *adev;
+
+	apd = container_of(genpd, struct acp_pm_domain, gpd);
+	adev = apd->adev;
+	/* call smu to POWER GATE ACP block
+	 * smu will
+	 * 1. turn off the acp clock
+	 * 2. power off the acp tiles
+	 * 3. check and enter ulv state
+	 */
+	amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
+	return 0;
+}
+
+static int acp_poweron(struct generic_pm_domain *genpd)
+{
+	struct acp_pm_domain *apd;
+	struct amdgpu_device *adev;
+
+	apd = container_of(genpd, struct acp_pm_domain, gpd);
+	adev = apd->adev;
+	/* call smu to UNGATE ACP block
+	 * smu will
+	 * 1. exit ulv
+	 * 2. turn on acp clock
+	 * 3. power on acp tiles
+	 */
+	amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
+	return 0;
+}
+
+static int acp_genpd_add_device(struct device *dev, void *data)
+{
+	struct generic_pm_domain *gpd = data;
+	int ret;
+
+	ret = pm_genpd_add_device(gpd, dev);
+	if (ret)
+		dev_err(dev, "Failed to add dev to genpd %d\n", ret);
+
+	return ret;
+}
+
+static int acp_genpd_remove_device(struct device *dev, void *data)
+{
+	int ret;
+
+	ret = pm_genpd_remove_device(dev);
+	if (ret)
+		dev_err(dev, "Failed to remove dev from genpd %d\n", ret);
+
+	/* Continue to remove */
+	return 0;
+}
+
+static int acp_quirk_cb(const struct dmi_system_id *id)
+{
+	acp_machine_id = ST_JADEITE;
+	return 1;
+}
+
+static const struct dmi_system_id acp_quirk_table[] = {
+	{
+		.callback = acp_quirk_cb,
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
+		}
+	},
+	{
+		.callback = acp_quirk_cb,
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
+		},
+	},
+	{
+		.callback = acp_quirk_cb,
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
+		},
+	},
+	{}
+};
+
+/**
+ * acp_hw_init - start and test ACP block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int acp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	u64 acp_base;
+	u32 val = 0;
+	u32 count = 0;
+	struct i2s_platform_data *i2s_pdata = NULL;
+
+	struct amdgpu_device *adev = ip_block->adev;
+
+	r = amd_acp_hw_init(adev->acp.cgs_device,
+			    ip_block->version->major, ip_block->version->minor);
+	/* -ENODEV means board uses AZ rather than ACP */
+	if (r == -ENODEV) {
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
+		return 0;
+	} else if (r) {
+		return r;
+	}
+
+	if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+		return -EINVAL;
+
+	acp_base = adev->rmmio_base;
+	adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+	if (!adev->acp.acp_genpd)
+		return -ENOMEM;
+
+	adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+	adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+	adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+	adev->acp.acp_genpd->adev = adev;
+
+	pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+	dmi_check_system(acp_quirk_table);
+	switch (acp_machine_id) {
+	case ST_JADEITE:
+	{
+		adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
+					     GFP_KERNEL);
+		if (!adev->acp.acp_cell) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
+		if (!adev->acp.acp_res) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
+		if (!i2s_pdata) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+				      DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+		i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+		i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+		i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+		i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+		adev->acp.acp_res[0].name = "acp2x_dma";
+		adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[0].start = acp_base;
+		adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+		adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
+		adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
+		adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+		adev->acp.acp_res[2].name = "acp2x_dma_irq";
+		adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
+		adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
+		adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
+
+		adev->acp.acp_cell[0].name = "acp_audio_dma";
+		adev->acp.acp_cell[0].id = 0;
+		adev->acp.acp_cell[0].num_resources = 3;
+		adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+		adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+		adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+		adev->acp.acp_cell[1].name = "designware-i2s";
+		adev->acp.acp_cell[1].id = 1;
+		adev->acp.acp_cell[1].num_resources = 1;
+		adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+		adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+		adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+		r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
+		if (r)
+			goto failure;
+		r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+					  acp_genpd_add_device);
+		if (r)
+			goto failure;
+		break;
+	}
+	default:
+		adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
+					     GFP_KERNEL);
+
+		if (!adev->acp.acp_cell) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
+		if (!adev->acp.acp_res) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
+		if (!i2s_pdata) {
+			r = -ENOMEM;
+			goto failure;
+		}
+
+		switch (adev->asic_type) {
+		case CHIP_STONEY:
+			i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+				DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+			break;
+		default:
+			i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+		}
+		i2s_pdata[0].cap = DWC_I2S_PLAY;
+		i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+		i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+		i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+		switch (adev->asic_type) {
+		case CHIP_STONEY:
+			i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+				DW_I2S_QUIRK_COMP_PARAM1 |
+				DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+			break;
+		default:
+			i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+				DW_I2S_QUIRK_COMP_PARAM1;
+		}
+
+		i2s_pdata[1].cap = DWC_I2S_RECORD;
+		i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+		i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+		i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+		i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+		switch (adev->asic_type) {
+		case CHIP_STONEY:
+			i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+			break;
+		default:
+			break;
+		}
+
+		i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+		i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+		i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+		i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
+		adev->acp.acp_res[0].name = "acp2x_dma";
+		adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[0].start = acp_base;
+		adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+		adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+		adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+		adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+		adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+		adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+		adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+		adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+		adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+		adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+		adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+		adev->acp.acp_res[4].name = "acp2x_dma_irq";
+		adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+		adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+		adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
+
+		adev->acp.acp_cell[0].name = "acp_audio_dma";
+		adev->acp.acp_cell[0].id = 0;
+		adev->acp.acp_cell[0].num_resources = 5;
+		adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+		adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+		adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+		adev->acp.acp_cell[1].name = "designware-i2s";
+		adev->acp.acp_cell[1].id = 1;
+		adev->acp.acp_cell[1].num_resources = 1;
+		adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+		adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+		adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+		adev->acp.acp_cell[2].name = "designware-i2s";
+		adev->acp.acp_cell[2].id = 2;
+		adev->acp.acp_cell[2].num_resources = 1;
+		adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+		adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+		adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+		adev->acp.acp_cell[3].name = "designware-i2s";
+		adev->acp.acp_cell[3].id = 3;
+		adev->acp.acp_cell[3].num_resources = 1;
+		adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+		adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+		adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
+		r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
+		if (r)
+			goto failure;
+
+		r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+					  acp_genpd_add_device);
+		if (r)
+			goto failure;
+	}
+
+	/* Assert Soft reset of ACP */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+	val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+	count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+		if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+		    (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			r = -ETIMEDOUT;
+			goto failure;
+		}
+		udelay(100);
+	}
+	/* Enable clock to ACP and wait until the clock is enabled */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+	val = val | ACP_CONTROL__ClkEn_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+	count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+		if (val & (u32) 0x1)
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			r = -ETIMEDOUT;
+			goto failure;
+		}
+		udelay(100);
+	}
+	/* Deassert the SOFT RESET flags */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+	val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+	return 0;
+
+failure:
+	kfree(i2s_pdata);
+	kfree(adev->acp.acp_res);
+	kfree(adev->acp.acp_cell);
+	kfree(adev->acp.acp_genpd);
+	return r;
+}
+
+/**
+ * acp_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	u32 val = 0;
+	u32 count = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* return early if no ACP */
+	if (!adev->acp.acp_genpd) {
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
+		return 0;
+	}
+
+	/* Assert Soft reset of ACP */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+
+	val |= ACP_SOFT_RESET__SoftResetAud_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
+
+	count = ACP_SOFT_RESET_DONE_TIME_OUT_VALUE;
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
+		if (ACP_SOFT_RESET__SoftResetAudDone_MASK ==
+		    (val & ACP_SOFT_RESET__SoftResetAudDone_MASK))
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+	/* Disable ACP clock */
+	val = cgs_read_register(adev->acp.cgs_device, mmACP_CONTROL);
+	val &= ~ACP_CONTROL__ClkEn_MASK;
+	cgs_write_register(adev->acp.cgs_device, mmACP_CONTROL, val);
+
+	count = ACP_CLOCK_EN_TIME_OUT_VALUE;
+
+	while (true) {
+		val = cgs_read_register(adev->acp.cgs_device, mmACP_STATUS);
+		if (val & (u32) 0x1)
+			break;
+		if (--count == 0) {
+			dev_err(&adev->pdev->dev, "Failed to reset ACP\n");
+			return -ETIMEDOUT;
+		}
+		udelay(100);
+	}
+
+	device_for_each_child(adev->acp.parent, NULL,
+			      acp_genpd_remove_device);
+
+	mfd_remove_devices(adev->acp.parent);
+	kfree(adev->acp.acp_res);
+	kfree(adev->acp.acp_genpd);
+	kfree(adev->acp.acp_cell);
+
+	return 0;
+}
+
+static int acp_suspend(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* power up on suspend */
+	if (!adev->acp.acp_cell)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
+	return 0;
+}
+
+static int acp_resume(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* power down again on resume */
+	if (!adev->acp.acp_cell)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
+	return 0;
+}
+
+static bool acp_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	bool enable = (state == AMD_PG_STATE_GATE);
+
+	amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
+
+	return 0;
+}
+
+static const struct amd_ip_funcs acp_ip_funcs = {
+	.name = "acp_ip",
+	.sw_init = acp_sw_init,
+	.sw_fini = acp_sw_fini,
+	.hw_init = acp_hw_init,
+	.hw_fini = acp_hw_fini,
+	.suspend = acp_suspend,
+	.resume = acp_resume,
+	.is_idle = acp_is_idle,
+	.set_clockgating_state = acp_set_clockgating_state,
+	.set_powergating_state = acp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version acp_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_ACP,
+	.major = 2,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &acp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
new file mode 100644
index 000000000000..a288ce25c176
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_ACP_H__
+#define __AMDGPU_ACP_H__
+
+#include <linux/mfd/core.h>
+
+struct amdgpu_acp {
+	struct device *parent;
+	struct cgs_device *cgs_device;
+	struct amd_acp_private *private;
+	struct mfd_cell *acp_cell;
+	struct resource *acp_res;
+	struct acp_pm_domain *acp_genpd;
+};
+
+extern const struct amdgpu_ip_block_version acp_ip_block;
+
+#endif /* __AMDGPU_ACP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
new file mode 100644
index 000000000000..d31460a9e958
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -0,0 +1,1565 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+#include <linux/power_supply.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <acpi/video.h>
+#include <acpi/actbl.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_display.h"
+#include "amd_acpi.h"
+#include "atom.h"
+
+/* Declare GUID for AMD _DSM method for XCCs */
+static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2,
+						 0xb8, 0xb4, 0x45, 0x56, 0x2e,
+						 0x8c, 0x5b, 0xec);
+
+#define AMD_XCC_HID_START 3000
+#define AMD_XCC_DSM_GET_NUM_FUNCS 0
+#define AMD_XCC_DSM_GET_SUPP_MODE 1
+#define AMD_XCC_DSM_GET_XCP_MODE 2
+#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4
+#define AMD_XCC_DSM_GET_TMR_INFO 5
+#define AMD_XCC_DSM_NUM_FUNCS 5
+
+#define AMD_XCC_MAX_HID 24
+
+struct xarray numa_info_xa;
+
+/* Encapsulates the XCD acpi object information */
+struct amdgpu_acpi_xcc_info {
+	struct list_head list;
+	struct amdgpu_numa_info *numa_info;
+	uint8_t xcp_node;
+	uint8_t phy_id;
+	acpi_handle handle;
+};
+
+struct amdgpu_acpi_dev_info {
+	struct list_head list;
+	struct list_head xcc_list;
+	uint32_t sbdf;
+	uint16_t supp_xcp_mode;
+	uint16_t xcp_mode;
+	uint16_t mem_mode;
+	uint64_t tmr_base;
+	uint64_t tmr_size;
+};
+
+struct list_head amdgpu_acpi_dev_list;
+
+struct amdgpu_atif_notification_cfg {
+	bool enabled;
+	int command_code;
+};
+
+struct amdgpu_atif_notifications {
+	bool thermal_state;
+	bool forced_power_state;
+	bool system_power_state;
+	bool brightness_change;
+	bool dgpu_display_event;
+	bool gpu_package_power_limit;
+};
+
+struct amdgpu_atif_functions {
+	bool system_params;
+	bool sbios_requests;
+	bool temperature_change;
+	bool query_backlight_transfer_characteristics;
+	bool ready_to_undock;
+	bool external_gpu_information;
+};
+
+struct amdgpu_atif {
+	acpi_handle handle;
+
+	struct amdgpu_atif_notifications notifications;
+	struct amdgpu_atif_functions functions;
+	struct amdgpu_atif_notification_cfg notification_cfg;
+	struct backlight_device *bd;
+	struct amdgpu_dm_backlight_caps backlight_caps;
+};
+
+struct amdgpu_atcs_functions {
+	bool get_ext_state;
+	bool pcie_perf_req;
+	bool pcie_dev_rdy;
+	bool pcie_bus_width;
+	bool power_shift_control;
+};
+
+struct amdgpu_atcs {
+	acpi_handle handle;
+
+	struct amdgpu_atcs_functions functions;
+};
+
+static struct amdgpu_acpi_priv {
+	struct amdgpu_atif atif;
+	struct amdgpu_atcs atcs;
+} amdgpu_acpi_priv;
+
+/* Call the ATIF method
+ */
+/**
+ * amdgpu_atif_call - call an ATIF method
+ *
+ * @atif: atif structure
+ * @function: the ATIF function to execute
+ * @params: ATIF function params
+ *
+ * Executes the requested ATIF function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
+					   int function,
+					   struct acpi_buffer *params)
+{
+	acpi_status status;
+	union acpi_object *obj;
+	union acpi_object atif_arg_elements[2];
+	struct acpi_object_list atif_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atif_arg.count = 2;
+	atif_arg.pointer = &atif_arg_elements[0];
+
+	atif_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atif_arg_elements[0].integer.value = function;
+
+	if (params) {
+		atif_arg_elements[1].type = ACPI_TYPE_BUFFER;
+		atif_arg_elements[1].buffer.length = params->length;
+		atif_arg_elements[1].buffer.pointer = params->pointer;
+	} else {
+		/* We need a second fake parameter */
+		atif_arg_elements[1].type = ACPI_TYPE_INTEGER;
+		atif_arg_elements[1].integer.value = 0;
+	}
+
+	status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
+				      &buffer);
+	obj = (union acpi_object *)buffer.pointer;
+
+	/* Fail if calling the method fails */
+	if (ACPI_FAILURE(status)) {
+		DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
+				 acpi_format_exception(status));
+		kfree(obj);
+		return NULL;
+	}
+
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		DRM_DEBUG_DRIVER("bad object returned from ATIF: %d\n",
+				 obj->type);
+		kfree(obj);
+		return NULL;
+	}
+
+	return obj;
+}
+
+/**
+ * amdgpu_atif_parse_notification - parse supported notifications
+ *
+ * @n: supported notifications struct
+ * @mask: supported notifications mask from ATIF
+ *
+ * Use the supported notifications mask from ATIF function
+ * ATIF_FUNCTION_VERIFY_INTERFACE to determine what notifications
+ * are supported (all asics).
+ */
+static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
+{
+	n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
+	n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
+	n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
+	n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
+	n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
+	n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
+}
+
+/**
+ * amdgpu_atif_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATIF
+ *
+ * Use the supported functions mask from ATIF function
+ * ATIF_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mask)
+{
+	f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
+	f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
+	f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
+	f->query_backlight_transfer_characteristics =
+		mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
+	f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
+	f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
+}
+
+/**
+ * amdgpu_atif_verify_interface - verify ATIF
+ *
+ * @atif: amdgpu atif struct
+ *
+ * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
+ * to initialize ATIF and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
+{
+	union acpi_object *info;
+	struct atif_verify_interface output;
+	size_t size;
+	int err = 0;
+
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
+	if (!info)
+		return -EIO;
+
+	memset(&output, 0, sizeof(output));
+
+	size = *(u16 *) info->buffer.pointer;
+	if (size < 12) {
+		DRM_INFO("ATIF buffer is too small: %zu\n", size);
+		err = -EINVAL;
+		goto out;
+	}
+	size = min(sizeof(output), size);
+
+	memcpy(&output, info->buffer.pointer, size);
+
+	/* TODO: check version? */
+	DRM_DEBUG_DRIVER("ATIF version %u\n", output.version);
+
+	amdgpu_atif_parse_notification(&atif->notifications, output.notification_mask);
+	amdgpu_atif_parse_functions(&atif->functions, output.function_bits);
+
+out:
+	kfree(info);
+	return err;
+}
+
+/**
+ * amdgpu_atif_get_notification_params - determine notify configuration
+ *
+ * @atif: acpi handle
+ *
+ * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
+ * to determine if a notifier is used and if so which one
+ * (all asics).  This is either Notify(VGA, 0x81) or Notify(VGA, n)
+ * where n is specified in the result if a notifier is used.
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
+{
+	union acpi_object *info;
+	struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg;
+	struct atif_system_params params;
+	size_t size;
+	int err = 0;
+
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS,
+				NULL);
+	if (!info) {
+		err = -EIO;
+		goto out;
+	}
+
+	size = *(u16 *) info->buffer.pointer;
+	if (size < 10) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memset(&params, 0, sizeof(params));
+	size = min(sizeof(params), size);
+	memcpy(&params, info->buffer.pointer, size);
+
+	DRM_DEBUG_DRIVER("SYSTEM_PARAMS: mask = %#x, flags = %#x\n",
+			params.flags, params.valid_mask);
+	params.flags = params.flags & params.valid_mask;
+
+	if ((params.flags & ATIF_NOTIFY_MASK) == ATIF_NOTIFY_NONE) {
+		n->enabled = false;
+		n->command_code = 0;
+	} else if ((params.flags & ATIF_NOTIFY_MASK) == ATIF_NOTIFY_81) {
+		n->enabled = true;
+		n->command_code = 0x81;
+	} else {
+		if (size < 11) {
+			err = -EINVAL;
+			goto out;
+		}
+		n->enabled = true;
+		n->command_code = params.command_code;
+	}
+
+out:
+	DRM_DEBUG_DRIVER("Notification %s, command code = %#x\n",
+			(n->enabled ? "enabled" : "disabled"),
+			n->command_code);
+	kfree(info);
+	return err;
+}
+
+/**
+ * amdgpu_atif_query_backlight_caps - get min and max backlight input signal
+ *
+ * @atif: acpi handle
+ *
+ * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
+ * to determine the acceptable range of backlight values
+ *
+ * Backlight_caps.caps_valid will be set to true if the query is successful
+ *
+ * The input signals are in range 0-255
+ *
+ * This function assumes the display with backlight is the first LCD
+ *
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
+{
+	union acpi_object *info;
+	struct atif_qbtc_output characteristics;
+	struct atif_qbtc_arguments arguments;
+	struct acpi_buffer params;
+	size_t size;
+	int err = 0;
+
+	arguments.size = sizeof(arguments);
+	arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;
+
+	params.length = sizeof(arguments);
+	params.pointer = (void *)&arguments;
+
+	info = amdgpu_atif_call(atif,
+		ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
+		&params);
+	if (!info) {
+		err = -EIO;
+		goto out;
+	}
+
+	size = *(u16 *) info->buffer.pointer;
+	if (size < 10) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memset(&characteristics, 0, sizeof(characteristics));
+	size = min(sizeof(characteristics), size);
+	memcpy(&characteristics, info->buffer.pointer, size);
+
+	atif->backlight_caps.caps_valid = true;
+	atif->backlight_caps.min_input_signal =
+			characteristics.min_input_signal;
+	atif->backlight_caps.max_input_signal =
+			characteristics.max_input_signal;
+	atif->backlight_caps.ac_level = characteristics.ac_level;
+	atif->backlight_caps.dc_level = characteristics.dc_level;
+	atif->backlight_caps.data_points = characteristics.number_of_points;
+	memcpy(atif->backlight_caps.luminance_data,
+	       characteristics.data_points,
+	       sizeof(atif->backlight_caps.luminance_data));
+out:
+	kfree(info);
+	return err;
+}
+
+/**
+ * amdgpu_atif_get_sbios_requests - get requested sbios event
+ *
+ * @atif: acpi handle
+ * @req: atif sbios request struct
+ *
+ * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
+ * to determine what requests the sbios is making to the driver
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif,
+					  struct atif_sbios_requests *req)
+{
+	union acpi_object *info;
+	size_t size;
+	int count = 0;
+
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS,
+				NULL);
+	if (!info)
+		return -EIO;
+
+	size = *(u16 *)info->buffer.pointer;
+	if (size < 0xd) {
+		count = -EINVAL;
+		goto out;
+	}
+	memset(req, 0, sizeof(*req));
+
+	size = min(sizeof(*req), size);
+	memcpy(req, info->buffer.pointer, size);
+	DRM_DEBUG_DRIVER("SBIOS pending requests: %#x\n", req->pending);
+
+	count = hweight32(req->pending);
+
+out:
+	kfree(info);
+	return count;
+}
+
+/**
+ * amdgpu_atif_handler - handle ATIF notify requests
+ *
+ * @adev: amdgpu_device pointer
+ * @event: atif sbios request struct
+ *
+ * Checks the acpi event and if it matches an atif event,
+ * handles it.
+ *
+ * Returns:
+ * NOTIFY_BAD or NOTIFY_DONE, depending on the event.
+ */
+static int amdgpu_atif_handler(struct amdgpu_device *adev,
+			       struct acpi_bus_event *event)
+{
+	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+	int count;
+
+	DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
+			event->device_class, event->type);
+
+	if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
+		return NOTIFY_DONE;
+
+	/* Is this actually our event? */
+	if (!atif->notification_cfg.enabled ||
+	    event->type != atif->notification_cfg.command_code) {
+		/* These events will generate keypresses otherwise */
+		if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
+			return NOTIFY_BAD;
+		else
+			return NOTIFY_DONE;
+	}
+
+	if (atif->functions.sbios_requests) {
+		struct atif_sbios_requests req;
+
+		/* Check pending SBIOS requests */
+		count = amdgpu_atif_get_sbios_requests(atif, &req);
+
+		if (count <= 0)
+			return NOTIFY_BAD;
+
+		DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
+
+		if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
+			if (atif->bd) {
+				DRM_DEBUG_DRIVER("Changing brightness to %d\n",
+						 req.backlight_level);
+				/*
+				 * XXX backlight_device_set_brightness() is
+				 * hardwired to post BACKLIGHT_UPDATE_SYSFS.
+				 * It probably should accept 'reason' parameter.
+				 */
+				backlight_device_set_brightness(atif->bd, req.backlight_level);
+			}
+		}
+
+		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
+			if (adev->flags & AMD_IS_PX) {
+				pm_runtime_get_sync(adev_to_drm(adev)->dev);
+				/* Just fire off a uevent and let userspace tell us what to do */
+				drm_helper_hpd_irq_event(adev_to_drm(adev));
+				pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+			}
+		}
+		/* TODO: check other events */
+	}
+
+	/* We've handled the event, stop the notifier chain. The ACPI interface
+	 * overloads ACPI_VIDEO_NOTIFY_PROBE, we don't want to send that to
+	 * userspace if the event was generated only to signal a SBIOS
+	 * request.
+	 */
+	return NOTIFY_BAD;
+}
+
+/* Call the ATCS method
+ */
+/**
+ * amdgpu_atcs_call - call an ATCS method
+ *
+ * @atcs: atcs structure
+ * @function: the ATCS function to execute
+ * @params: ATCS function params
+ *
+ * Executes the requested ATCS function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
+					   int function,
+					   struct acpi_buffer *params)
+{
+	acpi_status status;
+	union acpi_object atcs_arg_elements[2];
+	struct acpi_object_list atcs_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atcs_arg.count = 2;
+	atcs_arg.pointer = &atcs_arg_elements[0];
+
+	atcs_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atcs_arg_elements[0].integer.value = function;
+
+	if (params) {
+		atcs_arg_elements[1].type = ACPI_TYPE_BUFFER;
+		atcs_arg_elements[1].buffer.length = params->length;
+		atcs_arg_elements[1].buffer.pointer = params->pointer;
+	} else {
+		/* We need a second fake parameter */
+		atcs_arg_elements[1].type = ACPI_TYPE_INTEGER;
+		atcs_arg_elements[1].integer.value = 0;
+	}
+
+	status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer);
+
+	/* Fail only if calling the method fails and ATIF is supported */
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+		DRM_DEBUG_DRIVER("failed to evaluate ATCS got %s\n",
+				 acpi_format_exception(status));
+		kfree(buffer.pointer);
+		return NULL;
+	}
+
+	return buffer.pointer;
+}
+
+/**
+ * amdgpu_atcs_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATCS
+ *
+ * Use the supported functions mask from ATCS function
+ * ATCS_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atcs_parse_functions(struct amdgpu_atcs_functions *f, u32 mask)
+{
+	f->get_ext_state = mask & ATCS_GET_EXTERNAL_STATE_SUPPORTED;
+	f->pcie_perf_req = mask & ATCS_PCIE_PERFORMANCE_REQUEST_SUPPORTED;
+	f->pcie_dev_rdy = mask & ATCS_PCIE_DEVICE_READY_NOTIFICATION_SUPPORTED;
+	f->pcie_bus_width = mask & ATCS_SET_PCIE_BUS_WIDTH_SUPPORTED;
+	f->power_shift_control = mask & ATCS_SET_POWER_SHIFT_CONTROL_SUPPORTED;
+}
+
+/**
+ * amdgpu_atcs_verify_interface - verify ATCS
+ *
+ * @atcs: amdgpu atcs struct
+ *
+ * Execute the ATCS_FUNCTION_VERIFY_INTERFACE ATCS function
+ * to initialize ATCS and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
+{
+	union acpi_object *info;
+	struct atcs_verify_interface output;
+	size_t size;
+	int err = 0;
+
+	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
+	if (!info)
+		return -EIO;
+
+	memset(&output, 0, sizeof(output));
+
+	size = *(u16 *) info->buffer.pointer;
+	if (size < 8) {
+		DRM_INFO("ATCS buffer is too small: %zu\n", size);
+		err = -EINVAL;
+		goto out;
+	}
+	size = min(sizeof(output), size);
+
+	memcpy(&output, info->buffer.pointer, size);
+
+	/* TODO: check version? */
+	DRM_DEBUG_DRIVER("ATCS version %u\n", output.version);
+
+	amdgpu_atcs_parse_functions(&atcs->functions, output.function_bits);
+
+out:
+	kfree(info);
+	return err;
+}
+
+/**
+ * amdgpu_acpi_is_pcie_performance_request_supported
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check if the ATCS pcie_perf_req and pcie_dev_rdy methods
+ * are supported (all asics).
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *adev)
+{
+	struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+
+	if (atcs->functions.pcie_perf_req && atcs->functions.pcie_dev_rdy)
+		return true;
+
+	return false;
+}
+
+/**
+ * amdgpu_acpi_is_power_shift_control_supported
+ *
+ * Check if the ATCS power shift control method
+ * is supported.
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_power_shift_control_supported(void)
+{
+	return amdgpu_acpi_priv.atcs.functions.power_shift_control;
+}
+
+/**
+ * amdgpu_acpi_pcie_notify_device_ready
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Executes the PCIE_DEVICE_READY_NOTIFICATION method
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
+{
+	union acpi_object *info;
+	struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+
+	if (!atcs->functions.pcie_dev_rdy)
+		return -EINVAL;
+
+	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
+	if (!info)
+		return -EIO;
+
+	kfree(info);
+
+	return 0;
+}
+
+/**
+ * amdgpu_acpi_pcie_performance_request
+ *
+ * @adev: amdgpu_device pointer
+ * @perf_req: requested perf level (pcie gen speed)
+ * @advertise: set advertise caps flag if set
+ *
+ * Executes the PCIE_PERFORMANCE_REQUEST method to
+ * change the pcie gen speed (all asics).
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
+					 u8 perf_req, bool advertise)
+{
+	union acpi_object *info;
+	struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+	struct atcs_pref_req_input atcs_input;
+	struct atcs_pref_req_output atcs_output;
+	struct acpi_buffer params;
+	size_t size;
+	u32 retry = 3;
+
+	if (amdgpu_acpi_pcie_notify_device_ready(adev))
+		return -EINVAL;
+
+	if (!atcs->functions.pcie_perf_req)
+		return -EINVAL;
+
+	atcs_input.size = sizeof(struct atcs_pref_req_input);
+	/* client id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+	atcs_input.client_id = pci_dev_id(adev->pdev);
+	atcs_input.valid_flags_mask = ATCS_VALID_FLAGS_MASK;
+	atcs_input.flags = ATCS_WAIT_FOR_COMPLETION;
+	if (advertise)
+		atcs_input.flags |= ATCS_ADVERTISE_CAPS;
+	atcs_input.req_type = ATCS_PCIE_LINK_SPEED;
+	atcs_input.perf_req = perf_req;
+
+	params.length = sizeof(struct atcs_pref_req_input);
+	params.pointer = &atcs_input;
+
+	while (retry--) {
+		info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, &params);
+		if (!info)
+			return -EIO;
+
+		memset(&atcs_output, 0, sizeof(atcs_output));
+
+		size = *(u16 *) info->buffer.pointer;
+		if (size < 3) {
+			DRM_INFO("ATCS buffer is too small: %zu\n", size);
+			kfree(info);
+			return -EINVAL;
+		}
+		size = min(sizeof(atcs_output), size);
+
+		memcpy(&atcs_output, info->buffer.pointer, size);
+
+		kfree(info);
+
+		switch (atcs_output.ret_val) {
+		case ATCS_REQUEST_REFUSED:
+		default:
+			return -EINVAL;
+		case ATCS_REQUEST_COMPLETE:
+			return 0;
+		case ATCS_REQUEST_IN_PROGRESS:
+			udelay(10);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_acpi_power_shift_control
+ *
+ * @adev: amdgpu_device pointer
+ * @dev_state: device acpi state
+ * @drv_state: driver state
+ *
+ * Executes the POWER_SHIFT_CONTROL method to
+ * communicate current dGPU device state and
+ * driver state to APU/SBIOS.
+ * returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
+				    u8 dev_state, bool drv_state)
+{
+	union acpi_object *info;
+	struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+	struct atcs_pwr_shift_input atcs_input;
+	struct acpi_buffer params;
+
+	if (!amdgpu_acpi_is_power_shift_control_supported())
+		return -EINVAL;
+
+	atcs_input.size = sizeof(struct atcs_pwr_shift_input);
+	/* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */
+	atcs_input.dgpu_id = pci_dev_id(adev->pdev);
+	atcs_input.dev_acpi_state = dev_state;
+	atcs_input.drv_state = drv_state;
+
+	params.length = sizeof(struct atcs_pwr_shift_input);
+	params.pointer = &atcs_input;
+
+	info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, &params);
+	if (!info) {
+		DRM_ERROR("ATCS PSC update failed\n");
+		return -EIO;
+	}
+
+	kfree(info);
+	return 0;
+}
+
+/**
+ * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS
+ *
+ * @adev: amdgpu device pointer
+ * @ss_state: current smart shift event
+ *
+ * returns 0 on success,
+ * otherwise return error number.
+ */
+int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev,
+				   enum amdgpu_ss ss_state)
+{
+	int r;
+
+	if (!amdgpu_device_supports_smart_shift(adev))
+		return 0;
+
+	switch (ss_state) {
+	/* SBIOS trigger “stop”, “enable” and “start” at D0, Driver Operational.
+	 * SBIOS trigger “stop” at D3, Driver Not Operational.
+	 * SBIOS trigger “stop” and “disable” at D0, Driver NOT operational.
+	 */
+	case AMDGPU_SS_DRV_LOAD:
+		r = amdgpu_acpi_power_shift_control(adev,
+						    AMDGPU_ATCS_PSC_DEV_STATE_D0,
+						    AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+		break;
+	case AMDGPU_SS_DEV_D0:
+		r = amdgpu_acpi_power_shift_control(adev,
+						    AMDGPU_ATCS_PSC_DEV_STATE_D0,
+						    AMDGPU_ATCS_PSC_DRV_STATE_OPR);
+		break;
+	case AMDGPU_SS_DEV_D3:
+		r = amdgpu_acpi_power_shift_control(adev,
+						    AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT,
+						    AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+		break;
+	case AMDGPU_SS_DRV_UNLOAD:
+		r = amdgpu_acpi_power_shift_control(adev,
+						    AMDGPU_ATCS_PSC_DEV_STATE_D0,
+						    AMDGPU_ATCS_PSC_DRV_STATE_NOT_OPR);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return r;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+static inline uint64_t amdgpu_acpi_get_numa_size(int nid)
+{
+	/* This is directly using si_meminfo_node implementation as the
+	 * function is not exported.
+	 */
+	int zone_type;
+	uint64_t managed_pages = 0;
+
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+		managed_pages +=
+			zone_managed_pages(&pgdat->node_zones[zone_type]);
+	return managed_pages * PAGE_SIZE;
+}
+
+static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm)
+{
+	struct amdgpu_numa_info *numa_info;
+	int nid;
+
+	numa_info = xa_load(&numa_info_xa, pxm);
+
+	if (!numa_info) {
+		struct sysinfo info;
+
+		numa_info = kzalloc(sizeof(*numa_info), GFP_KERNEL);
+		if (!numa_info)
+			return NULL;
+
+		nid = pxm_to_node(pxm);
+		numa_info->pxm = pxm;
+		numa_info->nid = nid;
+
+		if (numa_info->nid == NUMA_NO_NODE) {
+			si_meminfo(&info);
+			numa_info->size = info.totalram * info.mem_unit;
+		} else {
+			numa_info->size = amdgpu_acpi_get_numa_size(nid);
+		}
+		xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL);
+	}
+
+	return numa_info;
+}
+#endif
+
+/**
+ * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu
+ * acpi device handle
+ *
+ * @handle: acpi handle
+ * @numa_info: amdgpu_numa_info structure holding numa information
+ *
+ * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a
+ * given amdgpu acpi device.
+ *
+ * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason
+ */
+static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle,
+				    struct amdgpu_numa_info **numa_info)
+{
+#ifdef CONFIG_ACPI_NUMA
+	u64 pxm;
+	acpi_status status;
+
+	if (!numa_info)
+		return_ACPI_STATUS(AE_ERROR);
+
+	status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
+
+	if (ACPI_FAILURE(status))
+		return status;
+
+	*numa_info = amdgpu_acpi_get_numa_info(pxm);
+
+	if (!*numa_info)
+		return_ACPI_STATUS(AE_ERROR);
+
+	return_ACPI_STATUS(AE_OK);
+#else
+	return_ACPI_STATUS(AE_NOT_EXIST);
+#endif
+}
+
+static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf)
+{
+	struct amdgpu_acpi_dev_info *acpi_dev;
+
+	if (list_empty(&amdgpu_acpi_dev_list))
+		return NULL;
+
+	list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list)
+		if (acpi_dev->sbdf == sbdf)
+			return acpi_dev;
+
+	return NULL;
+}
+
+static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info,
+				struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf)
+{
+	struct amdgpu_acpi_dev_info *tmp;
+	union acpi_object *obj;
+	int ret = -ENOENT;
+
+	*dev_info = NULL;
+	tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&tmp->xcc_list);
+	INIT_LIST_HEAD(&tmp->list);
+	tmp->sbdf = sbdf;
+
+	obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+				      AMD_XCC_DSM_GET_SUPP_MODE, NULL,
+				      ACPI_TYPE_INTEGER);
+
+	if (!obj) {
+		acpi_handle_debug(xcc_info->handle,
+				  "_DSM function %d evaluation failed",
+				  AMD_XCC_DSM_GET_SUPP_MODE);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	tmp->supp_xcp_mode = obj->integer.value & 0xFFFF;
+	ACPI_FREE(obj);
+
+	obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+				      AMD_XCC_DSM_GET_XCP_MODE, NULL,
+				      ACPI_TYPE_INTEGER);
+
+	if (!obj) {
+		acpi_handle_debug(xcc_info->handle,
+				  "_DSM function %d evaluation failed",
+				  AMD_XCC_DSM_GET_XCP_MODE);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	tmp->xcp_mode = obj->integer.value & 0xFFFF;
+	tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF;
+	ACPI_FREE(obj);
+
+	/* Evaluate DSMs and fill XCC information */
+	obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+				      AMD_XCC_DSM_GET_TMR_INFO, NULL,
+				      ACPI_TYPE_PACKAGE);
+
+	if (!obj || obj->package.count < 2) {
+		acpi_handle_debug(xcc_info->handle,
+				  "_DSM function %d evaluation failed",
+				  AMD_XCC_DSM_GET_TMR_INFO);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	tmp->tmr_base = obj->package.elements[0].integer.value;
+	tmp->tmr_size = obj->package.elements[1].integer.value;
+	ACPI_FREE(obj);
+
+	DRM_DEBUG_DRIVER(
+		"New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx  ",
+		tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode,
+		tmp->tmr_base, tmp->tmr_size);
+	list_add_tail(&tmp->list, &amdgpu_acpi_dev_list);
+	*dev_info = tmp;
+
+	return 0;
+
+out:
+	if (obj)
+		ACPI_FREE(obj);
+	kfree(tmp);
+
+	return ret;
+}
+
+static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info,
+				    u32 *sbdf)
+{
+	union acpi_object *obj;
+	acpi_status status;
+	int ret = -ENOENT;
+
+	obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+				      AMD_XCC_DSM_GET_NUM_FUNCS, NULL,
+				      ACPI_TYPE_INTEGER);
+
+	if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS)
+		goto out;
+	ACPI_FREE(obj);
+
+	/* Evaluate DSMs and fill XCC information */
+	obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0,
+				      AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL,
+				      ACPI_TYPE_INTEGER);
+
+	if (!obj) {
+		acpi_handle_debug(xcc_info->handle,
+				  "_DSM function %d evaluation failed",
+				  AMD_XCC_DSM_GET_VF_XCC_MAPPING);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* PF xcc id [39:32] */
+	xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF;
+	/* xcp node of this xcc [47:40] */
+	xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF;
+	/* PF domain of this xcc [31:16] */
+	*sbdf = (obj->integer.value) & 0xFFFF0000;
+	/* PF bus/dev/fn of this xcc [63:48] */
+	*sbdf |= (obj->integer.value >> 48) & 0xFFFF;
+	ACPI_FREE(obj);
+	obj = NULL;
+
+	status =
+		amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info);
+
+	/* TODO: check if this check is required */
+	if (ACPI_SUCCESS(status))
+		ret = 0;
+out:
+	if (obj)
+		ACPI_FREE(obj);
+
+	return ret;
+}
+
+static int amdgpu_acpi_enumerate_xcc(void)
+{
+	struct amdgpu_acpi_dev_info *dev_info = NULL;
+	struct amdgpu_acpi_xcc_info *xcc_info;
+	struct acpi_device *acpi_dev;
+	char hid[ACPI_ID_LEN];
+	int ret, id;
+	u32 sbdf;
+
+	INIT_LIST_HEAD(&amdgpu_acpi_dev_list);
+	xa_init(&numa_info_xa);
+
+	for (id = 0; id < AMD_XCC_MAX_HID; id++) {
+		sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id);
+		acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1);
+		/* These ACPI objects are expected to be in sequential order. If
+		 * one is not found, no need to check the rest.
+		 */
+		if (!acpi_dev) {
+			DRM_DEBUG_DRIVER("No matching acpi device found for %s",
+					 hid);
+			break;
+		}
+
+		xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info),
+				   GFP_KERNEL);
+		if (!xcc_info) {
+			DRM_ERROR("Failed to allocate memory for xcc info\n");
+			return -ENOMEM;
+		}
+
+		INIT_LIST_HEAD(&xcc_info->list);
+		xcc_info->handle = acpi_device_handle(acpi_dev);
+		acpi_dev_put(acpi_dev);
+
+		ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf);
+		if (ret) {
+			kfree(xcc_info);
+			continue;
+		}
+
+		dev_info = amdgpu_acpi_get_dev(sbdf);
+
+		if (!dev_info)
+			ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf);
+
+		if (ret == -ENOMEM)
+			return ret;
+
+		if (!dev_info) {
+			kfree(xcc_info);
+			continue;
+		}
+
+		list_add_tail(&xcc_info->list, &dev_info->xcc_list);
+	}
+
+	return 0;
+}
+
+int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset,
+			     u64 *tmr_size)
+{
+	struct amdgpu_acpi_dev_info *dev_info;
+	u32 sbdf;
+
+	if (!tmr_offset || !tmr_size)
+		return -EINVAL;
+
+	sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+	sbdf |= pci_dev_id(adev->pdev);
+	dev_info = amdgpu_acpi_get_dev(sbdf);
+	if (!dev_info)
+		return -ENOENT;
+
+	*tmr_offset = dev_info->tmr_base;
+	*tmr_size = dev_info->tmr_size;
+
+	return 0;
+}
+
+int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id,
+			     struct amdgpu_numa_info *numa_info)
+{
+	struct amdgpu_acpi_dev_info *dev_info;
+	struct amdgpu_acpi_xcc_info *xcc_info;
+	u32 sbdf;
+
+	if (!numa_info)
+		return -EINVAL;
+
+	sbdf = (pci_domain_nr(adev->pdev->bus) << 16);
+	sbdf |= pci_dev_id(adev->pdev);
+	dev_info = amdgpu_acpi_get_dev(sbdf);
+	if (!dev_info)
+		return -ENOENT;
+
+	list_for_each_entry(xcc_info, &dev_info->xcc_list, list) {
+		if (xcc_info->phy_id == xcc_id) {
+			memcpy(numa_info, xcc_info->numa_info,
+			       sizeof(*numa_info));
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * amdgpu_acpi_event - handle notify events
+ *
+ * @nb: notifier block
+ * @val: val
+ * @data: acpi event
+ *
+ * Calls relevant amdgpu functions in response to various
+ * acpi events.
+ * Returns NOTIFY code
+ */
+static int amdgpu_acpi_event(struct notifier_block *nb,
+			     unsigned long val,
+			     void *data)
+{
+	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, acpi_nb);
+	struct acpi_bus_event *entry = (struct acpi_bus_event *)data;
+
+	if (strcmp(entry->device_class, ACPI_AC_CLASS) == 0) {
+		if (power_supply_is_system_supplied() > 0)
+			DRM_DEBUG_DRIVER("pm: AC\n");
+		else
+			DRM_DEBUG_DRIVER("pm: DC\n");
+
+		amdgpu_pm_acpi_event_handler(adev);
+	}
+
+	/* Check for pending SBIOS requests */
+	return amdgpu_atif_handler(adev, entry);
+}
+
+/* Call all ACPI methods here */
+/**
+ * amdgpu_acpi_init - init driver acpi support
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Verifies the AMD ACPI interfaces and registers with the acpi
+ * notifier chain (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_acpi_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+
+	if (atif->notifications.brightness_change) {
+		if (adev->dc_enabled) {
+#if defined(CONFIG_DRM_AMD_DC)
+			struct amdgpu_display_manager *dm = &adev->dm;
+
+			if (dm->backlight_dev[0])
+				atif->bd = dm->backlight_dev[0];
+#endif
+		} else {
+			struct drm_encoder *tmp;
+
+			/* Find the encoder controlling the brightness */
+			list_for_each_entry(tmp, &adev_to_drm(adev)->mode_config.encoder_list,
+					    head) {
+				struct amdgpu_encoder *enc = to_amdgpu_encoder(tmp);
+
+				if ((enc->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
+				    enc->enc_priv) {
+					struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
+					if (dig->bl_dev) {
+						atif->bd = dig->bl_dev;
+						break;
+					}
+				}
+			}
+		}
+	}
+	adev->acpi_nb.notifier_call = amdgpu_acpi_event;
+	register_acpi_notifier(&adev->acpi_nb);
+
+	return 0;
+}
+
+void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
+{
+	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+
+	memcpy(caps, &atif->backlight_caps, sizeof(*caps));
+}
+
+/**
+ * amdgpu_acpi_fini - tear down driver acpi support
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Unregisters with the acpi notifier chain (all asics).
+ */
+void amdgpu_acpi_fini(struct amdgpu_device *adev)
+{
+	unregister_acpi_notifier(&adev->acpi_nb);
+}
+
+/**
+ * amdgpu_atif_pci_probe_handle - look up the ATIF handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATIF handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atif_pci_probe_handle(struct pci_dev *pdev)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	acpi_handle dhandle, atif_handle;
+	acpi_status status;
+	int ret;
+
+	dhandle = ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATIF", &atif_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	amdgpu_acpi_priv.atif.handle = atif_handle;
+	acpi_get_name(amdgpu_acpi_priv.atif.handle, ACPI_FULL_PATHNAME, &buffer);
+	DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
+	ret = amdgpu_atif_verify_interface(&amdgpu_acpi_priv.atif);
+	if (ret) {
+		amdgpu_acpi_priv.atif.handle = 0;
+		return false;
+	}
+	return true;
+}
+
+/**
+ * amdgpu_atcs_pci_probe_handle - look up the ATCS handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATCS handles (all asics).
+ * Returns true if the handle is found, false if not.
+ */
+static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
+	acpi_handle dhandle, atcs_handle;
+	acpi_status status;
+	int ret;
+
+	dhandle = ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATCS", &atcs_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	amdgpu_acpi_priv.atcs.handle = atcs_handle;
+	acpi_get_name(amdgpu_acpi_priv.atcs.handle, ACPI_FULL_PATHNAME, &buffer);
+	DRM_DEBUG_DRIVER("Found ATCS handle %s\n", acpi_method_name);
+	ret = amdgpu_atcs_verify_interface(&amdgpu_acpi_priv.atcs);
+	if (ret) {
+		amdgpu_acpi_priv.atcs.handle = 0;
+		return false;
+	}
+	return true;
+}
+
+
+/**
+ * amdgpu_acpi_should_gpu_reset
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if should reset GPU, false if not
+ */
+bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
+{
+	if ((adev->flags & AMD_IS_APU) &&
+	    adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
+		return false;
+
+	if ((adev->flags & AMD_IS_APU) &&
+	    amdgpu_acpi_is_s3_active(adev))
+		return false;
+
+	if (amdgpu_sriov_vf(adev))
+		return false;
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+	return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
+#else
+	return true;
+#endif
+}
+
+/*
+ * amdgpu_acpi_detect - detect ACPI ATIF/ATCS methods
+ *
+ * Check if we have the ATIF/ATCS methods and populate
+ * the structures in the driver.
+ */
+void amdgpu_acpi_detect(void)
+{
+	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
+	struct amdgpu_atcs *atcs = &amdgpu_acpi_priv.atcs;
+	struct pci_dev *pdev = NULL;
+	int ret;
+
+	while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+		if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+		    (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+			continue;
+
+		if (!atif->handle)
+			amdgpu_atif_pci_probe_handle(pdev);
+		if (!atcs->handle)
+			amdgpu_atcs_pci_probe_handle(pdev);
+	}
+
+	if (atif->functions.sbios_requests && !atif->functions.system_params) {
+		/* XXX check this workraround, if sbios request function is
+		 * present we have to see how it's configured in the system
+		 * params
+		 */
+		atif->functions.system_params = true;
+	}
+
+	if (atif->functions.system_params) {
+		ret = amdgpu_atif_get_notification_params(atif);
+		if (ret) {
+			DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n",
+					ret);
+			/* Disable notification */
+			atif->notification_cfg.enabled = false;
+		}
+	}
+
+	if (atif->functions.query_backlight_transfer_characteristics) {
+		ret = amdgpu_atif_query_backlight_caps(atif);
+		if (ret) {
+			DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
+					ret);
+			atif->backlight_caps.caps_valid = false;
+		}
+	} else {
+		atif->backlight_caps.caps_valid = false;
+	}
+
+	amdgpu_acpi_enumerate_xcc();
+}
+
+void amdgpu_acpi_release(void)
+{
+	struct amdgpu_acpi_dev_info *dev_info, *dev_tmp;
+	struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp;
+	struct amdgpu_numa_info *numa_info;
+	unsigned long index;
+
+	xa_for_each(&numa_info_xa, index, numa_info) {
+		kfree(numa_info);
+		xa_erase(&numa_info_xa, index);
+	}
+
+	if (list_empty(&amdgpu_acpi_dev_list))
+		return;
+
+	list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list,
+				 list) {
+		list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list,
+					 list) {
+			list_del(&xcc_info->list);
+			kfree(xcc_info);
+		}
+
+		list_del(&dev_info->list);
+		kfree(dev_info);
+	}
+}
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+/**
+ * amdgpu_acpi_is_s3_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev)
+{
+	return !(adev->flags & AMD_IS_APU) ||
+		(pm_suspend_target_state == PM_SUSPEND_MEM);
+}
+
+/**
+ * amdgpu_acpi_is_s0ix_active
+ *
+ * @adev: amdgpu_device_pointer
+ *
+ * returns true if supported, false if not.
+ */
+bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
+{
+	if (!(adev->flags & AMD_IS_APU) ||
+	    (pm_suspend_target_state != PM_SUSPEND_TO_IDLE))
+		return false;
+
+	if (adev->asic_type < CHIP_RAVEN)
+		return false;
+
+	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+		return false;
+
+	/*
+	 * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally
+	 * risky to do any special firmware-related preparations for entering
+	 * S0ix even though the system is suspending to idle, so return false
+	 * in that case.
+	 */
+	if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) {
+		dev_err_once(adev->dev,
+			      "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n"
+			      "To use suspend-to-idle change the sleep mode in BIOS setup.\n");
+		return false;
+	}
+
+#if !IS_ENABLED(CONFIG_AMD_PMC)
+	dev_err_once(adev->dev,
+		      "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n");
+	return false;
+#else
+	return true;
+#endif /* CONFIG_AMD_PMC */
+}
+#endif /* CONFIG_SUSPEND */
+
+#if IS_ENABLED(CONFIG_DRM_AMD_ISP)
+static const struct acpi_device_id isp_sensor_ids[] = {
+	{ "OMNI5C10" },
+	{ }
+};
+
+static int isp_match_acpi_device_ids(struct device *dev, const void *data)
+{
+	return acpi_match_device(data, dev) ? 1 : 0;
+}
+
+int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev)
+{
+	struct device *pdev __free(put_device) = NULL;
+	struct acpi_device *acpi_pdev;
+
+	pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids,
+			       isp_match_acpi_device_ids);
+	if (!pdev)
+		return -EINVAL;
+
+	acpi_pdev = ACPI_COMPANION(pdev);
+	if (!acpi_pdev)
+		return -ENODEV;
+
+	*dev = acpi_pdev;
+
+	return 0;
+}
+#endif /* CONFIG_DRM_AMD_ISP */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
new file mode 100644
index 000000000000..80771b1480ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Christian König.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#include <linux/hdmi.h>
+#include <linux/gcd.h>
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+
+static const struct amdgpu_afmt_acr amdgpu_afmt_predefined_acr[] = {
+    /*	     32kHz	  44.1kHz	48kHz    */
+    /* Clock      N     CTS      N     CTS      N     CTS */
+    {  25175,  4096,  25175, 28224, 125875,  6144,  25175 }, /*  25,20/1.001 MHz */
+    {  25200,  4096,  25200,  6272,  28000,  6144,  25200 }, /*  25.20       MHz */
+    {  27000,  4096,  27000,  6272,  30000,  6144,  27000 }, /*  27.00       MHz */
+    {  27027,  4096,  27027,  6272,  30030,  6144,  27027 }, /*  27.00*1.001 MHz */
+    {  54000,  4096,  54000,  6272,  60000,  6144,  54000 }, /*  54.00       MHz */
+    {  54054,  4096,  54054,  6272,  60060,  6144,  54054 }, /*  54.00*1.001 MHz */
+    {  74176,  4096,  74176,  5733,  75335,  6144,  74176 }, /*  74.25/1.001 MHz */
+    {  74250,  4096,  74250,  6272,  82500,  6144,  74250 }, /*  74.25       MHz */
+    { 148352,  4096, 148352,  5733, 150670,  6144, 148352 }, /* 148.50/1.001 MHz */
+    { 148500,  4096, 148500,  6272, 165000,  6144, 148500 }, /* 148.50       MHz */
+};
+
+
+/*
+ * calculate CTS and N values if they are not found in the table
+ */
+static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, int *N, int freq)
+{
+	int n, cts;
+	unsigned long div, mul;
+
+	/* Safe, but overly large values */
+	n = 128 * freq;
+	cts = clock * 1000;
+
+	/* Smallest valid fraction */
+	div = gcd(n, cts);
+
+	n /= div;
+	cts /= div;
+
+	/*
+	 * The optimal N is 128*freq/1000. Calculate the closest larger
+	 * value that doesn't truncate any bits.
+	 */
+	mul = ((128*freq/1000) + (n-1))/n;
+
+	n *= mul;
+	cts *= mul;
+
+	/* Check that we are in spec (not always possible) */
+	if (n < (128*freq/1500))
+		pr_warn("Calculated ACR N value is too small. You may experience audio problems.\n");
+	if (n > (128*freq/300))
+		pr_warn("Calculated ACR N value is too large. You may experience audio problems.\n");
+
+	*N = n;
+	*CTS = cts;
+
+	DRM_DEBUG("Calculated ACR timing N=%d CTS=%d for frequency %d\n",
+		  *N, *CTS, freq);
+}
+
+struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
+{
+	struct amdgpu_afmt_acr res;
+	u8 i;
+
+	/* Precalculated values for common clocks */
+	for (i = 0; i < ARRAY_SIZE(amdgpu_afmt_predefined_acr); i++) {
+		if (amdgpu_afmt_predefined_acr[i].clock == clock)
+			return amdgpu_afmt_predefined_acr[i];
+	}
+
+	/* And odd clocks get manually calculated */
+	amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
+	amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
+	amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
+	res.clock = clock;
+
+	return res;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
new file mode 100644
index 000000000000..a2879d2b7c8e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -0,0 +1,912 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_amdkfd.h"
+#include "amd_pcie.h"
+#include "amd_shared.h"
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
+#include <drm/ttm/ttm_tt.h>
+#include <linux/module.h>
+#include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
+
+/* Total memory size in system memory and all GPU VRAM. Used to
+ * estimate worst case amount of memory to reserve for page tables
+ */
+uint64_t amdgpu_amdkfd_total_mem_size;
+
+static bool kfd_initialized;
+
+int amdgpu_amdkfd_init(void)
+{
+	struct sysinfo si;
+	int ret;
+
+	si_meminfo(&si);
+	amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh;
+	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
+
+	ret = kgd2kfd_init();
+	kfd_initialized = !ret;
+
+	return ret;
+}
+
+void amdgpu_amdkfd_fini(void)
+{
+	if (kfd_initialized) {
+		kgd2kfd_exit();
+		kfd_initialized = false;
+	}
+}
+
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
+{
+	bool vf = amdgpu_sriov_vf(adev);
+
+	if (!kfd_initialized)
+		return;
+
+	adev->kfd.dev = kgd2kfd_probe(adev, vf);
+}
+
+/**
+ * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to
+ *                                setup amdkfd
+ *
+ * @adev: amdgpu_device pointer
+ * @aperture_base: output returning doorbell aperture base physical address
+ * @aperture_size: output returning doorbell aperture size in bytes
+ * @start_offset: output returning # of doorbell bytes reserved for amdgpu.
+ *
+ * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up,
+ * takes doorbells required for its own rings and reports the setup to amdkfd.
+ * amdgpu reserved doorbells are at the start of the doorbell aperture.
+ */
+static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
+					 phys_addr_t *aperture_base,
+					 size_t *aperture_size,
+					 size_t *start_offset)
+{
+	/*
+	 * The first num_kernel_doorbells are used by amdgpu.
+	 * amdkfd takes whatever's left in the aperture.
+	 */
+	if (adev->enable_mes) {
+		/*
+		 * With MES enabled, we only need to initialize
+		 * the base address. The size and offset are
+		 * not initialized as AMDGPU manages the whole
+		 * doorbell space.
+		 */
+		*aperture_base = adev->doorbell.base;
+		*aperture_size = 0;
+		*start_offset = 0;
+	} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
+						sizeof(u32)) {
+		*aperture_base = adev->doorbell.base;
+		*aperture_size = adev->doorbell.size;
+		*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
+	} else {
+		*aperture_base = 0;
+		*aperture_size = 0;
+		*start_offset = 0;
+	}
+}
+
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  kfd.reset_work);
+
+	struct amdgpu_reset_context reset_context;
+
+	memset(&reset_context, 0, sizeof(reset_context));
+
+	reset_context.method = AMD_RESET_METHOD_NONE;
+	reset_context.reset_req_dev = adev;
+	reset_context.src = adev->enable_mes ?
+			    AMDGPU_RESET_SRC_MES :
+			    AMDGPU_RESET_SRC_HWS;
+	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static const struct drm_client_funcs kfd_client_funcs = {
+	.unregister	= drm_client_release,
+};
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!adev->kfd.init_complete || adev->kfd.client.dev)
+		return 0;
+
+	ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
+			      &kfd_client_funcs);
+	if (ret) {
+		dev_err(adev->dev, "Failed to init DRM client: %d\n",
+			ret);
+		return ret;
+	}
+
+	drm_client_register(&adev->kfd.client);
+
+	return 0;
+}
+
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
+{
+	int i;
+	int last_valid_bit;
+
+	amdgpu_amdkfd_gpuvm_init_mem_limits();
+
+	if (adev->kfd.dev) {
+		struct kgd2kfd_shared_resources gpu_resources = {
+			.compute_vmid_bitmap =
+				((1 << AMDGPU_NUM_VMID) - 1) -
+				((1 << adev->vm_manager.first_kfd_vmid) - 1),
+			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+			.gpuvm_size = min(adev->vm_manager.max_pfn
+					  << AMDGPU_GPU_PAGE_SHIFT,
+					  AMDGPU_GMC_HOLE_START),
+			.drm_render_minor = adev_to_drm(adev)->render->index,
+			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+			.enable_mes = adev->enable_mes,
+		};
+
+		/* this is going to have a few of the MSBs set that we need to
+		 * clear
+		 */
+		bitmap_complement(gpu_resources.cp_queue_bitmap,
+				  adev->gfx.mec_bitmap[0].queue_bitmap,
+				  AMDGPU_MAX_QUEUES);
+
+		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
+		 * nbits is not compile time constant
+		 */
+		last_valid_bit = 1 /* only first MEC can have compute queues */
+				* adev->gfx.mec.num_pipe_per_mec
+				* adev->gfx.mec.num_queue_per_pipe;
+		for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i)
+			clear_bit(i, gpu_resources.cp_queue_bitmap);
+
+		amdgpu_doorbell_get_kfd_info(adev,
+				&gpu_resources.doorbell_physical_address,
+				&gpu_resources.doorbell_aperture_size,
+				&gpu_resources.doorbell_start_offset);
+
+		/* Since SOC15, BIF starts to statically use the
+		 * lower 12 bits of doorbell addresses for routing
+		 * based on settings in registers like
+		 * SDMA0_DOORBELL_RANGE etc..
+		 * In order to route a doorbell to CP engine, the lower
+		 * 12 bits of its address has to be outside the range
+		 * set for SDMA, VCN, and IH blocks.
+		 */
+		if (adev->asic_type >= CHIP_VEGA10) {
+			gpu_resources.non_cp_doorbells_start =
+					adev->doorbell_index.first_non_cp;
+			gpu_resources.non_cp_doorbells_end =
+					adev->doorbell_index.last_non_cp;
+		}
+
+		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
+							&gpu_resources);
+
+		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+
+		INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
+	}
+}
+
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
+{
+	if (adev->kfd.dev) {
+		kgd2kfd_device_exit(adev->kfd.dev);
+		adev->kfd.dev = NULL;
+		amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size;
+	}
+}
+
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
+		const void *ih_ring_entry)
+{
+	if (adev->kfd.dev)
+		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
+}
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
+{
+	if (adev->kfd.dev) {
+		if (adev->in_s0ix)
+			kgd2kfd_stop_sched_all_nodes(adev->kfd.dev);
+		else
+			kgd2kfd_suspend(adev->kfd.dev, suspend_proc);
+	}
+}
+
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)
+{
+	int r = 0;
+
+	if (adev->kfd.dev) {
+		if (adev->in_s0ix)
+			r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev);
+		else
+			r = kgd2kfd_resume(adev->kfd.dev, resume_proc);
+	}
+
+	return r;
+}
+
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev)
+{
+	if (adev->kfd.dev)
+		kgd2kfd_suspend_process(adev->kfd.dev);
+}
+
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_resume_process(adev->kfd.dev);
+
+	return r;
+}
+
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+			    struct amdgpu_reset_context *reset_context)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
+
+	return r;
+}
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_post_reset(adev->kfd.dev);
+
+	return r;
+}
+
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
+{
+	if (amdgpu_device_should_recover_gpu(adev))
+		amdgpu_reset_domain_schedule(adev->reset_domain,
+					     &adev->kfd.reset_work);
+}
+
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
+				void **mem_obj, uint64_t *gpu_addr,
+				void **cpu_ptr, bool cp_mqd_gfx9)
+{
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
+	int r;
+	void *cpu_ptr_tmp = NULL;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	if (cp_mqd_gfx9)
+		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
+
+	r = amdgpu_bo_create(adev, &bp, &bo);
+	if (r) {
+		dev_err(adev->dev,
+			"failed to allocate BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	/* map the buffer */
+	r = amdgpu_bo_reserve(bo, true);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+		goto allocate_mem_reserve_bo_failed;
+	}
+
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+		goto allocate_mem_pin_bo_failed;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (r) {
+		dev_err(adev->dev, "%p bind failed\n", bo);
+		goto allocate_mem_kmap_bo_failed;
+	}
+
+	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
+	if (r) {
+		dev_err(adev->dev,
+			"(%d) failed to map bo to kernel for amdkfd\n", r);
+		goto allocate_mem_kmap_bo_failed;
+	}
+
+	*mem_obj = bo;
+	*gpu_addr = amdgpu_bo_gpu_offset(bo);
+	*cpu_ptr = cpu_ptr_tmp;
+
+	amdgpu_bo_unreserve(bo);
+
+	return 0;
+
+allocate_mem_kmap_bo_failed:
+	amdgpu_bo_unpin(bo);
+allocate_mem_pin_bo_failed:
+	amdgpu_bo_unreserve(bo);
+allocate_mem_reserve_bo_failed:
+	amdgpu_bo_unref(&bo);
+
+	return r;
+}
+
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
+{
+	struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
+
+	if (!bo || !*bo)
+		return;
+
+	(void)amdgpu_bo_reserve(*bo, true);
+	amdgpu_bo_kunmap(*bo);
+	amdgpu_bo_unpin(*bo);
+	amdgpu_bo_unreserve(*bo);
+	amdgpu_bo_unref(bo);
+}
+
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+				void **mem_obj)
+{
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_user *ubo;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = 1;
+	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	r = amdgpu_bo_create_user(adev, &bp, &ubo);
+	if (r) {
+		dev_err(adev->dev,
+			"failed to allocate gws BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	bo = &ubo->bo;
+	*mem_obj = bo;
+	return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj)
+{
+	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+	amdgpu_bo_unref(&bo);
+}
+
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
+				      enum kgd_engine_type type)
+{
+	switch (type) {
+	case KGD_ENGINE_PFP:
+		return adev->gfx.pfp_fw_version;
+
+	case KGD_ENGINE_ME:
+		return adev->gfx.me_fw_version;
+
+	case KGD_ENGINE_CE:
+		return adev->gfx.ce_fw_version;
+
+	case KGD_ENGINE_MEC1:
+		return adev->gfx.mec_fw_version;
+
+	case KGD_ENGINE_MEC2:
+		return adev->gfx.mec2_fw_version;
+
+	case KGD_ENGINE_RLC:
+		return adev->gfx.rlc_fw_version;
+
+	case KGD_ENGINE_SDMA1:
+		return adev->sdma.instance[0].fw_version;
+
+	case KGD_ENGINE_SDMA2:
+		return adev->sdma.instance[1].fw_version;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+				      struct kfd_local_mem_info *mem_info,
+				      struct amdgpu_xcp *xcp)
+{
+	memset(mem_info, 0, sizeof(*mem_info));
+
+	if (xcp) {
+		if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size)
+			mem_info->local_mem_size_public =
+					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+		else
+			mem_info->local_mem_size_private =
+					KFD_XCP_MEMORY_SIZE(adev, xcp->id);
+	} else if (adev->apu_prefer_gtt) {
+		mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
+		mem_info->local_mem_size_private = 0;
+	} else {
+		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+						adev->gmc.visible_vram_size;
+	}
+	mem_info->vram_width = adev->gmc.vram_width;
+
+	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",
+			&adev->gmc.aper_base,
+			mem_info->local_mem_size_public,
+			mem_info->local_mem_size_private);
+
+	if (adev->pm.dpm_enabled) {
+		if (amdgpu_emu_mode == 1)
+			mem_info->mem_clk_max = 0;
+		else
+			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
+	} else
+		mem_info->mem_clk_max = 100;
+}
+
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+	if (adev->gfx.funcs->get_gpu_clock_counter)
+		return adev->gfx.funcs->get_gpu_clock_counter(adev);
+	return 0;
+}
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
+{
+	/* the sclk is in quantas of 10kHz */
+	if (adev->pm.dpm_enabled)
+		return amdgpu_dpm_get_sclk(adev, false) / 100;
+	else
+		return 100;
+}
+
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+				  struct amdgpu_device **dmabuf_adev,
+				  uint64_t *bo_size, void *metadata_buffer,
+				  size_t buffer_size, uint32_t *metadata_size,
+				  uint32_t *flags, int8_t *xcp_id)
+{
+	struct dma_buf *dma_buf;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *bo;
+	uint64_t metadata_flags;
+	int r = -EINVAL;
+
+	dma_buf = dma_buf_get(dma_buf_fd);
+	if (IS_ERR(dma_buf))
+		return PTR_ERR(dma_buf);
+
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
+		/* Can't handle non-graphics buffers */
+		goto out_put;
+
+	obj = dma_buf->priv;
+	if (obj->dev->driver != adev_to_drm(adev)->driver)
+		/* Can't handle buffers from different drivers */
+		goto out_put;
+
+	adev = drm_to_adev(obj->dev);
+	bo = gem_to_amdgpu_bo(obj);
+	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT)))
+		/* Only VRAM and GTT BOs are supported */
+		goto out_put;
+
+	r = 0;
+	if (dmabuf_adev)
+		*dmabuf_adev = adev;
+	if (bo_size)
+		*bo_size = amdgpu_bo_size(bo);
+	if (metadata_buffer)
+		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
+					   metadata_size, &metadata_flags);
+	if (flags) {
+		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+				KFD_IOC_ALLOC_MEM_FLAGS_VRAM
+				: KFD_IOC_ALLOC_MEM_FLAGS_GTT;
+
+		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+			*flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
+	}
+	if (xcp_id)
+		*xcp_id = bo->xcp_id;
+
+out_put:
+	dma_buf_put(dma_buf);
+	return r;
+}
+
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
+{
+	int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
+							fls(adev->pm.pcie_mlw_mask)) - 1;
+	int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask &
+						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) :
+					fls(adev->pm.pcie_gen_mask &
+						CAIL_PCIE_LINK_SPEED_SUPPORT_MASK)) - 1;
+	uint32_t num_lanes_mask = 1 << num_lanes_shift;
+	uint32_t gen_speed_mask = 1 << gen_speed_shift;
+	int num_lanes_factor = 0, gen_speed_mbits_factor = 0;
+
+	switch (num_lanes_mask) {
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X1:
+		num_lanes_factor = 1;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X2:
+		num_lanes_factor = 2;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X4:
+		num_lanes_factor = 4;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X8:
+		num_lanes_factor = 8;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X12:
+		num_lanes_factor = 12;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X16:
+		num_lanes_factor = 16;
+		break;
+	case CAIL_PCIE_LINK_WIDTH_SUPPORT_X32:
+		num_lanes_factor = 32;
+		break;
+	}
+
+	switch (gen_speed_mask) {
+	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1:
+		gen_speed_mbits_factor = 2500;
+		break;
+	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2:
+		gen_speed_mbits_factor = 5000;
+		break;
+	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3:
+		gen_speed_mbits_factor = 8000;
+		break;
+	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4:
+		gen_speed_mbits_factor = 16000;
+		break;
+	case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5:
+		gen_speed_mbits_factor = 32000;
+		break;
+	}
+
+	return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE;
+}
+
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+				enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len)
+{
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct amdgpu_ring *ring;
+	struct dma_fence *f = NULL;
+	int ret;
+
+	switch (engine) {
+	case KGD_ENGINE_MEC1:
+		ring = &adev->gfx.compute_ring[0];
+		break;
+	case KGD_ENGINE_SDMA1:
+		ring = &adev->sdma.instance[0].ring;
+		break;
+	case KGD_ENGINE_SDMA2:
+		ring = &adev->sdma.instance[1].ring;
+		break;
+	default:
+		pr_err("Invalid engine in IB submission: %d\n", engine);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
+	if (ret)
+		goto err;
+
+	ib = &job->ibs[0];
+	memset(ib, 0, sizeof(struct amdgpu_ib));
+
+	ib->gpu_addr = gpu_addr;
+	ib->ptr = ib_cmd;
+	ib->length_dw = ib_len;
+	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
+	job->vmid = vmid;
+	job->num_ibs = 1;
+
+	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+
+	if (ret) {
+		DRM_ERROR("amdgpu: failed to schedule IB.\n");
+		goto err_ib_sched;
+	}
+
+	/* Drop the initial kref_init count (see drm_sched_main as example) */
+	dma_fence_put(f);
+	ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+	amdgpu_job_free(job);
+err:
+	return ret;
+}
+
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
+{
+	enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
+	if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+	    ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+		(IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
+		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
+		amdgpu_gfx_off_ctrl(adev, idle);
+	} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
+		(adev->flags & AMD_IS_APU)) {
+		/* Disable GFXOFF and PG. Temporary workaround
+		 * to fix some compute applications issue on GFX9.
+		 */
+		struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+		if (gfx_block != NULL)
+			gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
+	}
+	amdgpu_dpm_switch_power_profile(adev,
+					PP_SMC_POWER_PROFILE_COMPUTE,
+					!idle);
+}
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+	if (adev->kfd.dev)
+		return vmid >= adev->vm_manager.first_kfd_vmid;
+
+	return false;
+}
+
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
+{
+	return adev->have_atomics_support;
+}
+
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+	amdgpu_device_flush_hdp(adev, NULL);
+}
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
+{
+	return amdgpu_ras_get_fed_status(adev);
+}
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+				enum amdgpu_ras_block block, uint16_t pasid,
+				pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+	amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
+}
+
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+	enum amdgpu_ras_block block, uint32_t reset)
+{
+	amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+					uint32_t *payload)
+{
+	int ret;
+
+	/* Device or IH ring is not ready so bail. */
+	ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih);
+	if (ret)
+		return ret;
+
+	/* Send payload to fence KFD interrupts */
+	amdgpu_amdkfd_interrupt(adev, payload);
+
+	return 0;
+}
+
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
+{
+	return kgd2kfd_check_and_lock_kfd(adev->kfd.dev);
+}
+
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev)
+{
+	kgd2kfd_unlock_kfd(adev->kfd.dev);
+}
+
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
+{
+	s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id);
+	u64 tmp;
+
+	if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) {
+		if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) {
+			/* In NPS1 mode, we should restrict the vram reporting
+			 * tied to the ttm_pages_limit which is 1/2 of the system
+			 * memory. For other partition modes, the HBM is uniformly
+			 * divided already per numa node reported. If user wants to
+			 * go beyond the default ttm limit and maximize the ROCm
+			 * allocations, they can go up to max ttm and sysmem limits.
+			 */
+
+			tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes();
+		} else {
+			tmp = adev->gmc.mem_partitions[mem_id].size;
+		}
+		do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
+		return ALIGN_DOWN(tmp, PAGE_SIZE);
+	} else if (adev->apu_prefer_gtt) {
+		return (ttm_tt_pages_limit() << PAGE_SHIFT);
+	} else {
+		return adev->gmc.real_vram_size;
+	}
+}
+
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+			    u32 inst)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	struct amdgpu_ring_funcs *ring_funcs;
+	struct amdgpu_ring *ring;
+	int r = 0;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+		return 0;
+
+	ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
+	if (!ring_funcs)
+		return -ENOMEM;
+
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring) {
+		r = -ENOMEM;
+		goto free_ring_funcs;
+	}
+
+	ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE;
+	ring->doorbell_index = doorbell_off;
+	ring->funcs = ring_funcs;
+
+	spin_lock(&kiq->ring_lock);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock(&kiq->ring_lock);
+		r = -ENOMEM;
+		goto free_ring;
+	}
+
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
+
+	/* Submit unmap queue packet */
+	amdgpu_ring_commit(kiq_ring);
+	/*
+	 * Ring test will do a basic scratch register change check. Just run
+	 * this to ensure that unmap queues that is submitted before got
+	 * processed successfully before returning.
+	 */
+	r = amdgpu_ring_test_helper(kiq_ring);
+
+	spin_unlock(&kiq->ring_lock);
+
+free_ring:
+	kfree(ring);
+
+free_ring_funcs:
+	kfree(ring_funcs);
+
+	return r;
+}
+
+/* Stop scheduling on KFD */
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+	if (!adev->kfd.init_complete)
+		return 0;
+
+	return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
+}
+
+/* Start scheduling on KFD */
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
+{
+	if (!adev->kfd.init_complete)
+		return 0;
+
+	return kgd2kfd_start_sched(adev->kfd.dev, node_id);
+}
+
+/* check if there are KFD queues active */
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
+{
+	if (!adev->kfd.init_complete)
+		return false;
+
+	return kgd2kfd_compute_active(adev->kfd.dev, node_id);
+}
+
+/* Config CGTT_SQ_CLK_CTRL */
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+	bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
+{
+	int r;
+
+	if (!adev->kfd.init_complete)
+		return 0;
+
+	r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
+					reg_override_enable, perfmon_override_enable);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
new file mode 100644
index 000000000000..8bdfcde2029b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -0,0 +1,550 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
+
+#ifndef AMDGPU_AMDKFD_H_INCLUDED
+#define AMDGPU_AMDKFD_H_INCLUDED
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
+#include <kgd_kfd_interface.h>
+#include <drm/drm_client.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
+
+extern uint64_t amdgpu_amdkfd_total_mem_size;
+
+enum TLB_FLUSH_TYPE {
+	TLB_FLUSH_LEGACY = 0,
+	TLB_FLUSH_LIGHTWEIGHT,
+	TLB_FLUSH_HEAVYWEIGHT
+};
+
+struct amdgpu_device;
+struct kfd_process_device;
+struct amdgpu_reset_context;
+
+enum kfd_mem_attachment_type {
+	KFD_MEM_ATT_SHARED,	/* Share kgd_mem->bo or another attachment's */
+	KFD_MEM_ATT_USERPTR,	/* SG bo to DMA map pages from a userptr bo */
+	KFD_MEM_ATT_DMABUF,	/* DMAbuf to DMA map TTM BOs */
+	KFD_MEM_ATT_SG		/* Tag to DMA map SG BOs */
+};
+
+struct kfd_mem_attachment {
+	struct list_head list;
+	enum kfd_mem_attachment_type type;
+	bool is_mapped;
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_device *adev;
+	uint64_t va;
+	uint64_t pte_flags;
+};
+
+struct kgd_mem {
+	struct mutex lock;
+	struct amdgpu_bo *bo;
+	struct dma_buf *dmabuf;
+	struct amdgpu_hmm_range *range;
+	struct list_head attachments;
+	/* protected by amdkfd_process_info.lock */
+	struct list_head validate_list;
+	uint32_t domain;
+	unsigned int mapped_to_gpu_memory;
+	uint64_t va;
+
+	uint32_t alloc_flags;
+
+	uint32_t invalid;
+	struct amdkfd_process_info *process_info;
+
+	struct amdgpu_sync sync;
+
+	uint32_t gem_handle;
+	bool aql_queue;
+	bool is_imported;
+};
+
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+	struct dma_fence base;
+	struct mm_struct *mm;
+	spinlock_t lock;
+	char timeline_name[TASK_COMM_LEN];
+	struct svm_range_bo *svm_bo;
+};
+
+struct amdgpu_kfd_dev {
+	struct kfd_dev *dev;
+	int64_t vram_used[MAX_XCP];
+	uint64_t vram_used_aligned[MAX_XCP];
+	bool init_complete;
+	struct work_struct reset_work;
+
+	/* Client for KFD BO GEM handle allocations */
+	struct drm_client_dev client;
+
+	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping
+	 * Must be last --ends in a flexible-array member.
+	 */
+	struct dev_pagemap pgmap;
+};
+
+enum kgd_engine_type {
+	KGD_ENGINE_PFP = 1,
+	KGD_ENGINE_ME,
+	KGD_ENGINE_CE,
+	KGD_ENGINE_MEC1,
+	KGD_ENGINE_MEC2,
+	KGD_ENGINE_RLC,
+	KGD_ENGINE_SDMA1,
+	KGD_ENGINE_SDMA2,
+	KGD_ENGINE_MAX
+};
+
+
+struct amdkfd_process_info {
+	/* List head of all VMs that belong to a KFD process */
+	struct list_head vm_list_head;
+	/* List head for all KFD BOs that belong to a KFD process. */
+	struct list_head kfd_bo_list;
+	/* List of userptr BOs that are valid or invalid */
+	struct list_head userptr_valid_list;
+	struct list_head userptr_inval_list;
+	/* Lock to protect kfd_bo_list */
+	struct mutex lock;
+
+	/* Number of VMs */
+	unsigned int n_vms;
+	/* Eviction Fence */
+	struct amdgpu_amdkfd_fence *eviction_fence;
+
+	/* MMU-notifier related fields */
+	struct mutex notifier_lock;
+	uint32_t evicted_bos;
+	struct delayed_work restore_userptr_work;
+	struct pid *pid;
+	bool block_mmu_notifications;
+};
+
+int amdgpu_amdkfd_init(void);
+void amdgpu_amdkfd_fini(void);
+
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
+void amdgpu_amdkfd_suspend_process(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume_process(struct amdgpu_device *adev);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
+			const void *ih_ring_entry);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
+int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev);
+void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
+				enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len);
+void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+			    struct amdgpu_reset_context *reset_context);
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
+
+void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev);
+
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+					int queue_bit);
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+				struct mm_struct *mm,
+				struct svm_range_bo *svm_bo);
+
+int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
+#if defined(CONFIG_DEBUG_FS)
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
+#endif
+#if IS_ENABLED(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem);
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+					uint32_t domain,
+					struct dma_fence *fence);
+#else
+static inline
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	return false;
+}
+
+static inline
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	return NULL;
+}
+
+static inline
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
+{
+}
+
+static inline
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem)
+{
+	return 0;
+}
+static inline
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+					uint32_t domain,
+					struct dma_fence *fence)
+{
+	return 0;
+}
+#endif
+/* Shared API */
+int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
+				void **mem_obj, uint64_t *gpu_addr,
+				void **cpu_ptr, bool mqd_gfx9);
+void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
+				void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
+uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev,
+				      enum kgd_engine_type type);
+void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+				      struct kfd_local_mem_info *mem_info,
+				      struct amdgpu_xcp *xcp);
+uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
+
+uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+				  struct amdgpu_device **dmabuf_adev,
+				  uint64_t *bo_size, void *metadata_buffer,
+				  size_t buffer_size, uint32_t *metadata_size,
+				  uint32_t *flags, int8_t *xcp_id);
+int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
+int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
+					uint32_t *payload);
+int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
+				u32 inst);
+int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+	bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
+
+
+/* Read user wptr from a specified user address space with page fault
+ * disabled. The memory must be pinned and mapped to the hardware when
+ * this is called in hqd_load functions, so it should never fault in
+ * the first place. This resolves a circular lock dependency involving
+ * four locks, including the DQM lock and mmap_lock.
+ */
+#define read_user_wptr(mmptr, wptr, dst)				\
+	({								\
+		bool valid = false;					\
+		if ((mmptr) && (wptr)) {				\
+			pagefault_disable();				\
+			if ((mmptr) == current->mm) {			\
+				valid = !get_user((dst), (wptr));	\
+			} else if (current->flags & PF_KTHREAD) {	\
+				kthread_use_mm(mmptr);			\
+				valid = !get_user((dst), (wptr));	\
+				kthread_unuse_mm(mmptr);		\
+			}						\
+			pagefault_enable();				\
+		}							\
+		valid;							\
+	})
+
+/* GPUVM API */
+#define drm_priv_to_vm(drm_priv)					\
+	(&((struct amdgpu_fpriv *)					\
+		((struct drm_file *)(drm_priv))->driver_priv)->vm)
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+					struct amdgpu_vm *avm,
+					void **process_info,
+					struct dma_fence **ef);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+					uint8_t xcp_id);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct amdgpu_device *adev, uint64_t va, uint64_t size,
+		void *drm_priv, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags, bool criu_resume);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+		uint64_t *size);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
+					  struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+					     void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
+
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart);
+
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+					    struct dma_fence __rcu **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+					      struct kfd_vm_fault_info *info);
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset);
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+				      struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
+				struct tile_config *config);
+void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint16_t pasid,
+			pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+		uint64_t size, u32 alloc_flag, int8_t xcp_id);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+		uint64_t size, u32 alloc_flag, int8_t xcp_id);
+
+u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id);
+
+#define KFD_XCP_MEM_ID(adev, xcp_id) \
+		((adev)->xcp_mgr && (xcp_id) >= 0 ?\
+		(adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1)
+
+#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id))
+
+
+#if IS_ENABLED(CONFIG_HSA_AMD)
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
+#else
+static inline
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+}
+
+static inline
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+					struct amdgpu_vm *vm)
+{
+}
+
+static inline
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev);
+#else
+static inline
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+	return 0;
+}
+#endif
+
+/* KGD2KFD callbacks */
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+						struct dma_fence *fence);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+int kgd2kfd_init(void);
+void kgd2kfd_exit(void);
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 const struct kgd2kfd_shared_resources *gpu_resources);
+void kgd2kfd_device_exit(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc);
+int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc);
+void kgd2kfd_suspend_process(struct kfd_dev *kfd);
+int kgd2kfd_resume_process(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+		      struct amdgpu_reset_context *reset_context);
+int kgd2kfd_post_reset(struct kfd_dev *kfd);
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
+int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);
+void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+			       bool retry_fault);
+
+#else
+static inline int kgd2kfd_init(void)
+{
+	return -ENOENT;
+}
+
+static inline void kgd2kfd_exit(void)
+{
+}
+
+static inline
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+	return NULL;
+}
+
+static inline
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+				const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	return false;
+}
+
+static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+}
+
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
+{
+}
+
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
+{
+	return 0;
+}
+
+static inline void kgd2kfd_suspend_process(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_resume_process(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+				    struct amdgpu_reset_context *reset_context)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+}
+
+static inline
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
+
+static inline
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
+{
+}
+
+static inline int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
+{
+}
+
+static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+	return false;
+}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+				      bool retry_fault)
+{
+	return false;
+}
+
+#endif
+#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
new file mode 100644
index 000000000000..7e9f7a280c1b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_2_offset.h"
+#include "gc/gc_9_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+					    bool restore_dbg_registers,
+					    uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
+						bool keep_trap_enabled,
+						uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
+							uint32_t trap_override,
+							uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+				KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+				KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+				KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+				KFD_DBG_TRAP_MASK_FP_INEXACT |
+				KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+				KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+			trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+		return -EPERM;
+
+	return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
+static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					uint32_t vmid,
+					uint32_t trap_override,
+					uint32_t trap_mask_bits,
+					uint32_t trap_mask_request,
+					uint32_t *trap_mask_prev,
+					uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+	uint32_t data = 0;
+
+	*trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+	trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+		(*trap_mask_prev & ~trap_mask_request);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+	return data;
+}
+
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+	return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_aldebaran_set_address_watch(
+					struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst)
+{
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 6);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low);
+
+	return watch_address_cntl;
+}
+
+const struct kfd2kgd_calls aldebaran_kfd2kgd = {
+	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_gfx_v9_init_interrupts,
+	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+	.hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
+	.hqd_dump = kgd_gfx_v9_hqd_dump,
+	.hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
+	.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
+	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+	.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+	.validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
+	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
+	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+	.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v9_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
new file mode 100644
index 000000000000..a7bdaf8d82dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev,
+					bool restore_dbg_registers,
+					uint32_t vmid);
+uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
new file mode 100644
index 000000000000..1105a09e55dc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
+#include "amdgpu_reset.h"
+#include "sdma0/sdma0_4_2_2_offset.h"
+#include "sdma0/sdma0_4_2_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_2_offset.h"
+#include "sdma1/sdma1_4_2_2_sh_mask.h"
+#include "sdma2/sdma2_4_2_2_offset.h"
+#include "sdma2/sdma2_4_2_2_sh_mask.h"
+#include "sdma3/sdma3_4_2_2_offset.h"
+#include "sdma3/sdma3_4_2_2_sh_mask.h"
+#include "sdma4/sdma4_4_2_2_offset.h"
+#include "sdma4/sdma4_4_2_2_sh_mask.h"
+#include "sdma5/sdma5_4_2_2_offset.h"
+#include "sdma5/sdma5_4_2_2_sh_mask.h"
+#include "sdma6/sdma6_4_2_2_offset.h"
+#include "sdma6/sdma6_4_2_2_sh_mask.h"
+#include "sdma7/sdma7_4_2_2_offset.h"
+#include "sdma7/sdma7_4_2_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	default:
+		dev_warn(adev->dev,
+			 "Invalid sdma engine id (%d), using engine id 0\n",
+			 engine_id);
+		fallthrough;
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+		break;
+	case 2:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+		break;
+	case 3:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+				mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+		break;
+	case 4:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+				mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+		break;
+	case 5:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+				mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+		break;
+	case 6:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+				mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+		break;
+	case 7:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+				mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+		break;
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
+}
+
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+				void *mqd)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+/*
+ * Helper used to suspend/resume gfx pipe for image post process work to set
+ * barrier behaviour.
+ */
+static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend)
+{
+	int i, r = 0;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+		if (!amdgpu_ring_sched_ready(ring))
+			continue;
+
+		/* stop secheduler and drain ring. */
+		if (suspend) {
+			drm_sched_stop(&ring->sched, NULL);
+			r = amdgpu_fence_wait_empty(ring);
+			if (r)
+				goto out;
+		} else {
+			drm_sched_start(&ring->sched, 0);
+		}
+	}
+
+out:
+	/* return on resume or failure to drain rings. */
+	if (!suspend || r)
+		return r;
+
+	return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX);
+}
+
+static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt)
+{
+	uint32_t data;
+
+	WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt);
+
+	if (!down_read_trylock(&adev->reset_domain->sem))
+		return;
+
+	amdgpu_amdkfd_suspend(adev, true);
+
+	if (suspend_resume_compute_scheduler(adev, true))
+		goto out;
+
+	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG));
+	data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
+						!enable_waitcnt);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data);
+
+out:
+	suspend_resume_compute_scheduler(adev, false);
+
+	amdgpu_amdkfd_resume(adev, true);
+
+	up_read(&adev->reset_domain->sem);
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
+				bool restore_dbg_registers,
+				uint32_t vmid)
+{
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	set_barrier_auto_waitcnt(adev, true);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
+					bool keep_trap_enabled,
+					uint32_t vmid)
+{
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	set_barrier_auto_waitcnt(adev, false);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+const struct kfd2kgd_calls arcturus_kfd2kgd = {
+	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_gfx_v9_init_interrupts,
+	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+	.hqd_sdma_load = kgd_arcturus_hqd_sdma_load,
+	.hqd_dump = kgd_gfx_v9_hqd_dump,
+	.hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
+	.hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy,
+	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+	.set_vm_context_page_table_base =
+				kgd_gfx_v9_set_vm_context_page_table_base,
+	.enable_debug_trap = kgd_arcturus_enable_debug_trap,
+	.disable_debug_trap = kgd_arcturus_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v9_set_address_watch,
+	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
+	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+	.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v9_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
new file mode 100644
index 000000000000..756c1a5679c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+int kgd_arcturus_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
+bool kgd_arcturus_hqd_sdma_is_occupied(struct amdgpu_device *adev,
+				void *mqd);
+int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644
index 000000000000..1ef758ac5076
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_device_funcs->eviction_valuable().
+ *
+ * ttm_device_funcs->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_device_funcs->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ *  --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+				struct mm_struct *mm,
+				struct svm_range_bo *svm_bo)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (fence == NULL)
+		return NULL;
+
+	/* This reference gets released in amdkfd_fence_release */
+	mmgrab(mm);
+	fence->mm = mm;
+	get_task_comm(fence->timeline_name, current);
+	spin_lock_init(&fence->lock);
+	fence->svm_bo = svm_bo;
+	dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+		   context, atomic_inc_return(&fence_seq));
+
+	return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence;
+
+	if (!f)
+		return NULL;
+
+	fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+	if (f->ops == &amdkfd_fence_ops)
+		return fence;
+
+	return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+	return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ *
+ *  @f: dma_fence
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+
+	if (dma_fence_is_signaled(f))
+		return true;
+
+	if (!fence->svm_bo) {
+		if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+			return true;
+	} else {
+		if (!svm_range_schedule_evict_svm_bo(fence))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @f: dma_fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	/* Unconditionally signal the fence. The process is getting
+	 * terminated.
+	 */
+	if (WARN_ON(!fence))
+		return; /* Not an amdgpu_amdkfd_fence */
+
+	mmdrop(fence->mm);
+	kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ *
+ * Check if @mm is same as that of the fence @f, if same return TRUE else
+ * return FALSE.
+ * For svm bo, which support vram overcommitment, always return FALSE.
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+	if (!fence)
+		return false;
+	else if (fence->mm == mm  && !fence->svm_bo)
+		return true;
+
+	return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+	.get_driver_name = amdkfd_fence_get_driver_name,
+	.get_timeline_name = amdkfd_fence_get_timeline_name,
+	.enable_signaling = amdkfd_fence_enable_signaling,
+	.release = amdkfd_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
new file mode 100644
index 000000000000..89a45a9218f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include "amdgpu_amdkfd_aldebaran.h"
+#include "gc/gc_9_4_3_offset.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+#include "athub/athub_1_8_0_offset.h"
+#include "athub/athub_1_8_0_sh_mask.h"
+#include "oss/osssys_4_4_2_offset.h"
+#include "oss/osssys_4_4_2_sh_mask.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "sdma/sdma_4_4_2_offset.h"
+#include "sdma/sdma_4_4_2_sh_mask.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+					unsigned int engine_id,
+					unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base =
+		SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id),
+				 regSDMA_RLC0_RB_CNTL) -
+		regSDMA_RLC0_RB_CNTL;
+	uint32_t retval = sdma_engine_reg_base +
+		  queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+							queue_id, retval);
+	return retval;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+				 uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+							m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+		if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET,
+		m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL,
+				ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR,
+					m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI,
+					m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+			lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+			upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR,
+			m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI,
+			m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL,
+				RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev,
+				 uint32_t engine_id, uint32_t queue_id,
+				 uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+							engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))      \
+			break;				\
+		(*dump)[i][0] = (addr) << 2;            \
+		(*dump)[i++][1] = RREG32(addr);         \
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA_RLC0_IB_SUB_REMAIN;
+	     reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA_RLC0_MIDCMD_DATA0;
+	     reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+							m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				    unsigned int utimeout)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+							m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL);
+	temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) |
+		SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr =
+			RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+			RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
+			u32 pasid, unsigned int vmid, uint32_t xcc_inst)
+{
+	unsigned long timeout;
+	unsigned int reg;
+	unsigned int phy_inst = GET_INST(GC, xcc_inst);
+	/* Every two XCCs share one AID */
+	unsigned int aid = phy_inst / 2;
+
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+		regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
+
+	timeout = jiffies + msecs_to_jiffies(10);
+	while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
+			regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+			(1U << vmid))) {
+		if (time_after(jiffies, timeout)) {
+			pr_err("Fail to program VMID-PASID mapping\n");
+			return -ETIME;
+		}
+		cpu_relax();
+	}
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+		regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+		1U << vmid);
+
+	reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX));
+	/* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs,
+	 * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1"
+	 * programs _LUT for XCC and "aid * 4" for AID where the XCC connects
+	 * to.
+	 */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+		aid * 4 + (phy_inst % 2) + 1);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid,
+		pasid_mapping);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX),
+		aid * 4);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid,
+		pasid_mapping);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg);
+
+	return 0;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+	struct v9_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, hqd_end, data;
+
+	m = get_mqd(mqd);
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+	/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR);
+	hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
+
+	for (reg = hqd_base; reg <= hqd_end; reg++)
+		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO,
+			lower_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI,
+			upper_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR,
+			lower_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+			upper_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1,
+			(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR,
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data);
+
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev,
+						bool keep_trap_enabled,
+						uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+static int kgd_gfx_v9_4_3_validate_trap_override_request(
+				struct amdgpu_device *adev,
+				uint32_t trap_override,
+				uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+				KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+				KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+				KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+				KFD_DBG_TRAP_MASK_FP_INEXACT |
+				KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+				KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+				KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+				KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+			trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+		return -EPERM;
+
+	return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+	uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+	uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+	uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+				KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+				KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+				KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+				KFD_DBG_TRAP_MASK_FP_INEXACT |
+				KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+				KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+	uint32_t ret;
+
+	ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+	return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+	uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+	return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override(
+				struct amdgpu_device *adev,
+				uint32_t vmid,
+				uint32_t trap_override,
+				uint32_t trap_mask_bits,
+				uint32_t trap_mask_request,
+				uint32_t *trap_mask_prev,
+				uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+	uint32_t data = 0;
+
+	*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+	data = (trap_mask_bits & trap_mask_request) |
+	       (*trap_mask_prev & ~trap_mask_request);
+	data = trap_mask_map_sw_to_hw(data);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+	return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v9_4_3_set_address_watch(
+				struct amdgpu_device *adev,
+				uint64_t watch_address,
+				uint32_t watch_address_mask,
+				uint32_t watch_id,
+				uint32_t watch_mode,
+				uint32_t debug_vmid,
+				uint32_t inst)
+{
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 7);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+
+	WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+			regTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high, inst);
+
+	WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+			regTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low, inst);
+
+	return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
+				uint32_t watch_id)
+{
+	return 0;
+}
+
+static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+						     int engine, int queue)
+{
+	uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
+	uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
+	uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
+	bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);
+
+	return is_active ? doorbell_off >> 2 : 0;
+}
+
+const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
+	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_gfx_v9_init_interrupts,
+	.hqd_load = kgd_gfx_v9_4_3_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+	.hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load,
+	.hqd_dump = kgd_gfx_v9_hqd_dump,
+	.hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
+	.hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy,
+	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+	.set_vm_context_page_table_base =
+				kgd_gfx_v9_set_vm_context_page_table_base,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.program_trap_handler_settings =
+				kgd_gfx_v9_program_trap_handler_settings,
+	.build_dequeue_wait_counts_packet_info =
+				kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+	.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
+	.validate_trap_override_request =
+			kgd_gfx_v9_4_3_validate_trap_override_request,
+	.set_wave_launch_trap_override =
+			kgd_gfx_v9_4_3_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
+	.clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
+	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v9_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
new file mode 100644
index 000000000000..0239114fb6c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -0,0 +1,1123 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES,
+	SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	mutex_lock(&adev->srbm_mutex);
+	nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
+
+	return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
+
+	pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+#if 0
+	/* TODO: uncomment this code when the hardware support is ready. */
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << vmid)))
+		cpu_relax();
+
+	pr_debug("ATHUB mapping update finished\n");
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << vmid);
+#endif
+
+	/* Mapping vmid to pasid also for IH block */
+	pr_debug("update mapping for IH block and mmhub");
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+	       pasid_mapping);
+
+	return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base[2] = {
+		SOC15_REG_OFFSET(SDMA0, 0,
+				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+		/* On gfx10, mmSDMA1_xxx registers are defined NOT based
+		 * on SDMA1 base address (dw 0x1860) but based on SDMA0
+		 * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
+		 * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
+		 * below
+		 */
+		SOC15_REG_OFFSET(SDMA1, 0,
+				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
+	};
+
+	uint32_t retval = sdma_engine_reg_base[engine_id]
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, retval);
+
+	return retval;
+}
+
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
+{
+	uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
+			mmTCP_WATCH0_ADDR_H;
+
+	pr_debug("kfd: reg watch base address: 0x%x\n", retval);
+
+	return retval;
+}
+#endif
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v10_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+	struct v10_compute_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+	acquire_queue(adev, pipe_id, queue_id);
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+		       lower_32_bits(guessed_wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+		       upper_32_bits(guessed_wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+		       lower_32_bits((uint64_t)wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+		       upper_32_bits((uint64_t)wptr));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+		WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_RPTR,
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
+
+	release_queue(adev);
+
+	return 0;
+}
+
+static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off, uint32_t inst)
+{
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+	struct v10_compute_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq[0].ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq[0].ring_lock);
+	release_queue(adev);
+
+	return r;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+				uint64_t queue_address, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+		   high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
+			retval = true;
+	}
+	release_queue(adev);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v10_compute_mqd *m = get_mqd(mqd);
+
+	if (amdgpu_in_reset(adev))
+		return -EIO;
+
+#if 0
+	unsigned long flags;
+	int retry;
+#endif
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+#if 0 /* Is this still needed? */
+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");
+
+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+#endif
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			release_queue(adev);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(adev);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
+	WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SA_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t page_table_base)
+{
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	/* SDMA is on gfxhub as well for Navi1* series */
+	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * GFX10 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ *   Target VMID to stall/unstall.
+ *
+ * stall:
+ *   0-unstall wave launch (enable), 1-stall wave launch (disable).
+ *   After wavefront launch has been stalled, allocated waves must drain from
+ *   SPI in order for debug trap settings to take effect on those waves.
+ *   This is roughly a ~3500 clock cycle wait on SPI where a read on
+ *   SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ *   KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ *   NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ *   because current GFX10 chips cannot support multi-process debugging due to
+ *   trap configuration and masking being limited to global scope.  Always
+ *   assume single process conditions.
+ *
+ */
+
+#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY	110
+static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall)
+{
+	uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+	int i;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+							stall ? 1 << vmid : 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+	if (!stall)
+		return;
+
+	for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+		RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+				bool restore_dbg_registers,
+				uint32_t vmid)
+{
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+	/* assume gfx off is disabled for the debug session if rlc restore not supported. */
+	if (restore_dbg_registers) {
+		uint32_t data = 0;
+
+		data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+				VMID_SEL, 1 << vmid);
+		data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+				TRAP_EN, 1);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+
+		kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+		mutex_unlock(&adev->grbm_idx_mutex);
+
+		return 0;
+	}
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+					bool keep_trap_enabled,
+					uint32_t vmid)
+{
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+					      uint32_t trap_override,
+					      uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+	/* The SPI_GDBG_TRAP_MASK register is global and affects all
+	 * processes. Only allow OR-ing the address-watch bit, since
+	 * this only affects processes under the debugger. Other bits
+	 * should stay 0 to avoid the debugger interfering with other
+	 * processes.
+	 */
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+		return -EINVAL;
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					      uint32_t vmid,
+					      uint32_t trap_override,
+					      uint32_t trap_mask_bits,
+					      uint32_t trap_mask_request,
+					      uint32_t *trap_mask_prev,
+					      uint32_t kfd_dbg_trap_cntl_prev)
+{
+	uint32_t data, wave_cntl_prev;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+	*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+	trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+		(*trap_mask_prev & ~trap_mask_request);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+	/* We need to preserve wave launch mode stall settings. */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid)
+{
+	uint32_t data = 0;
+	bool is_mode_set = !!wave_launch_mode;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+			VMID_MASK, is_mode_set ? 1 << vmid : 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+			MODE, is_mode_set ? wave_launch_mode : 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst)
+{
+	/* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+	 * same values.
+	 */
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t tcp_watch_address_cntl;
+	uint32_t sq_watch_address_cntl;
+
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	tcp_watch_address_cntl = 0;
+	tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VMID,
+			debug_vmid);
+	tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+	tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 7);
+
+	sq_watch_address_cntl = 0;
+	sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+			SQ_WATCH0_CNTL,
+			VMID,
+			debug_vmid);
+	sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+			SQ_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+	sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+			SQ_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 6);
+
+	/* Turning off this watch point until we set all the registers */
+	tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			0);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			tcp_watch_address_cntl);
+
+	sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+			SQ_WATCH0_CNTL,
+			VALID,
+			0);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+			(watch_id * SQ_WATCH_STRIDE)),
+			sq_watch_address_cntl);
+
+	/* Program {TCP,SQ}_WATCH?_ADDR* */
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low);
+
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+			(watch_id * SQ_WATCH_STRIDE)),
+			watch_address_high);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+			(watch_id * SQ_WATCH_STRIDE)),
+			watch_address_low);
+
+	/* Enable the watch point */
+	tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			tcp_watch_address_cntl);
+
+	sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+			SQ_WATCH0_CNTL,
+			VALID,
+			1);
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+			(watch_id * SQ_WATCH_STRIDE)),
+			sq_watch_address_cntl);
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+					uint32_t watch_id)
+{
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_cntl);
+
+	WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+			(watch_id * SQ_WATCH_STRIDE)),
+			watch_address_cntl);
+
+	return 0;
+}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
+
+
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
+ *     atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ *     wrm_offload_wait_time    -- Wait Count for WAIT_REG_MEM Offloads.
+ *     gws_wait_time            -- Wait Count for Global Wave Syncs.
+ *     que_sleep_wait_time      -- Wait Count for Dequeue Retry.
+ *     sch_wave_wait_time       -- Wait Count for Scheduling Wave Message.
+ *     sem_rearm_wait_time      -- Wait Count for Semaphore re-arm.
+ *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+					uint32_t *wait_times,
+					uint32_t inst)
+
+{
+	*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+						uint32_t wait_times,
+						uint32_t sch_wave,
+						uint32_t que_sleep,
+						uint32_t *reg_offset,
+						uint32_t *reg_data)
+{
+	*reg_data = wait_times;
+
+	if (sch_wave)
+		*reg_data = REG_SET_FIELD(*reg_data,
+				CP_IQ_WAIT_TIME2,
+				SCH_WAVE,
+				sch_wave);
+	if (que_sleep)
+		*reg_data = REG_SET_FIELD(*reg_data,
+				CP_IQ_WAIT_TIME2,
+				QUE_SLEEP,
+				que_sleep);
+
+	*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+static void program_trap_handler_settings(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+		uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(adev);
+}
+
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+				     uint32_t pipe_id, uint32_t queue_id,
+				     uint32_t inst)
+{
+	return 0;
+}
+
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id,
+			       uint32_t inst, unsigned int utimeout)
+{
+	return 0;
+}
+
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					   int engine, int queue)
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hiq_mqd_load = kgd_hiq_mqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.wave_control_execute = kgd_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v10_set_address_watch,
+	.clear_address_watch = kgd_gfx_v10_clear_address_watch,
+	.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+	.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+	.program_trap_handler_settings = program_trap_handler_settings,
+	.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v10_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
new file mode 100644
index 000000000000..a4c607c88178
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
+				      bool restore_dbg_registers,
+				      uint32_t vmid);
+uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
+					bool keep_trap_enabled,
+					uint32_t vmid);
+int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
+					     uint32_t trap_override,
+					     uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					     uint32_t vmid,
+					     uint32_t trap_override,
+					     uint32_t trap_mask_bits,
+					     uint32_t trap_mask_request,
+					     uint32_t *trap_mask_prev,
+					     uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
+					 uint8_t wave_launch_mode,
+					 uint32_t vmid);
+uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst);
+uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
+					uint32_t watch_id);
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+				uint32_t *wait_times,
+				uint32_t inst);
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+					       uint32_t wait_times,
+					       uint32_t sch_wave,
+					       uint32_t que_sleep,
+					       uint32_t *reg_offset,
+					       uint32_t *reg_data);
+uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+				    uint32_t pipe_id,
+				    uint32_t queue_id,
+				    uint32_t inst);
+uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
+			      uint32_t pipe_id,
+			      uint32_t queue_id,
+			      uint32_t inst,
+			      unsigned int utimeout);
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					   int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
new file mode 100644
index 000000000000..f2278a0937ff
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_gfx_v10.h"
+#include "gc/gc_10_3_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES,
+	SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	mutex_lock(&adev->srbm_mutex);
+	nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
+
+	return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	unlock_srbm(adev);
+}
+
+/* ATC is defeatured on Sienna_Cichlid */
+static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+	/* Mapping vmid to pasid also for IH block */
+	pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+			vmid, pasid);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, value);
+
+	return 0;
+}
+
+static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	default:
+		dev_warn(adev->dev,
+			 "Invalid sdma engine id (%d), using engine id 0\n",
+			 engine_id);
+		fallthrough;
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 2:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 3:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
+}
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v10_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+	struct v10_compute_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+	acquire_queue(adev, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32_SOC15_IP(GC, reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+		       lower_32_bits(guessed_wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+		       upper_32_bits(guessed_wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+		       lower_32_bits((uint64_t)wptr));
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+		       upper_32_bits((uint64_t)wptr));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+		WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1,
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, data);
+
+	release_queue(adev);
+
+	return 0;
+}
+
+static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off, uint32_t inst)
+{
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+	struct v10_compute_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq[0].ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq[0].ring_lock);
+	release_queue(adev);
+
+	return r;
+}
+
+static int hqd_dump_v10_3(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32_SOC15_IP(GC, addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int hqd_sdma_load_v10_3(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+12)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
+				uint64_t queue_address, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) &&
+		   high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI))
+			retval = true;
+	}
+	release_queue(adev);
+	return retval;
+}
+
+static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
+				void *mqd)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v10_compute_mqd *m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE);
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue pipe %d queue %d preemption failed\n",
+					pipe_id, queue_id);
+			release_queue(adev);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(adev);
+	return 0;
+}
+
+static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static int wave_control_execute_v10_3(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SA_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t page_table_base)
+{
+	/* SDMA is on gfxhub as well for Navi1* series */
+	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
+			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+			uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
+			lower_32_bits(tba_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
+			upper_32_bits(tba_addr >> 8) |
+			(1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
+			lower_32_bits(tma_addr >> 8));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
+			 upper_32_bits(tma_addr >> 8));
+
+	unlock_srbm(adev);
+}
+
+const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
+	.program_sh_mem_settings = program_sh_mem_settings_v10_3,
+	.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3,
+	.init_interrupts = init_interrupts_v10_3,
+	.hqd_load = hqd_load_v10_3,
+	.hiq_mqd_load = hiq_mqd_load_v10_3,
+	.hqd_sdma_load = hqd_sdma_load_v10_3,
+	.hqd_dump = hqd_dump_v10_3,
+	.hqd_sdma_dump = hqd_sdma_dump_v10_3,
+	.hqd_is_occupied = hqd_is_occupied_v10_3,
+	.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3,
+	.hqd_destroy = hqd_destroy_v10_3,
+	.hqd_sdma_destroy = hqd_sdma_destroy_v10_3,
+	.wave_control_execute = wave_control_execute_v10_3,
+	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
+	.program_trap_handler_settings = program_trap_handler_settings_v10_3,
+	.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
+	.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
+	.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v10_set_address_watch,
+	.clear_address_watch = kgd_gfx_v10_clear_address_watch,
+	.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v10_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
new file mode 100644
index 000000000000..aaccf0b9947d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/mmu_context.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "oss/osssys_6_0_0_offset.h"
+#include "oss/osssys_6_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "soc15d.h"
+#include "v11_structs.h"
+#include "soc21.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES,
+	SAVE_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static uint64_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
+
+	return 1ull << bit;
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_CONFIG), sh_mem_config);
+	WREG32(SOC15_REG_OFFSET(GC, 0, regSH_MEM_BASES), sh_mem_bases);
+
+	unlock_srbm(adev);
+}
+
+static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
+
+	/* Mapping vmid to pasid also for IH block */
+	pr_debug("mapping vmid %d -> pasid %d in IH block for GFX client\n",
+			vmid, pasid);
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, value);
+
+	return 0;
+}
+
+static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+		break;
+	default:
+		BUG();
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
+}
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v11_sdma_mqd *)mqd;
+}
+
+static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm, uint32_t inst)
+{
+	struct v11_compute_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+	acquire_queue(adev, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS));
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_CP_SCHEDULERS), value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
+		       lower_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
+		       upper_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+		       lower_32_bits((uint64_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		       upper_32_bits((uint64_t)wptr));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__,
+			 (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+		WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
+		       (uint32_t)get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
+
+	release_queue(adev);
+
+	return 0;
+}
+
+static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
+			      uint32_t pipe_id, uint32_t queue_id,
+			      uint32_t doorbell_off, uint32_t inst)
+{
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
+	struct v11_compute_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq[0].ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq[0].ring_lock);
+	release_queue(adev);
+
+	return r;
+}
+
+static int hqd_dump_v11(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int hqd_sdma_load_v11(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v11_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+		if (data & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_QUEUE0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (7+11+1+12+12)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = regSDMA0_QUEUE0_RB_CNTL;
+	     reg <= regSDMA0_QUEUE0_RB_WPTR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA0_QUEUE0_RB_RPTR_ADDR_HI;
+	     reg <= regSDMA0_QUEUE0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA0_QUEUE0_DOORBELL_LOG;
+	     reg <= regSDMA0_QUEUE0_DOORBELL_LOG; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA0_QUEUE0_DOORBELL_OFFSET;
+	     reg <= regSDMA0_QUEUE0_RB_PREEMPT; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = regSDMA0_QUEUE0_MIDCMD_DATA0;
+	     reg <= regSDMA0_QUEUE0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
+				uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	act = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE)) &&
+		   high == RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_BASE_HI)))
+			retval = true;
+	}
+	release_queue(adev);
+	return retval;
+}
+
+static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
+{
+	struct v11_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v11_compute_mqd *m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15_PREREG(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_DEQUEUE_REQUEST), type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE));
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue pipe %d queue %d preemption failed\n",
+					pipe_id, queue_id);
+			release_queue(adev);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(adev);
+	return 0;
+}
+
+static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct v11_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL);
+	temp = temp & ~SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_CONTEXT_STATUS);
+		if (temp & SDMA0_QUEUE0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_CNTL) |
+		SDMA0_QUEUE0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_rlc_reg_offset + regSDMA0_QUEUE0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static int wave_control_execute_v11(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SA_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t page_table_base)
+{
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	/* SDMA is on gfxhub as well for gfx11 adapters */
+	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+/*
+ * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
+ *
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev,
+					    bool restore_dbg_registers,
+					    uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
+						bool keep_trap_enabled,
+						uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
+							uint32_t trap_override,
+							uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+				KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+				KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+				KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+				KFD_DBG_TRAP_MASK_FP_INEXACT |
+				KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+				KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 4))
+		*trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+					KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+			trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+		return -EPERM;
+
+	return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+	uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+	uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+	uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+			KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+			KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+			KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+			KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+			KFD_DBG_TRAP_MASK_FP_INEXACT |
+			KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+			KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+			KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+	uint32_t ret;
+
+	ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+	return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+	uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+	return ret;
+}
+
+/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					uint32_t vmid,
+					uint32_t trap_override,
+					uint32_t trap_mask_bits,
+					uint32_t trap_mask_request,
+					uint32_t *trap_mask_prev,
+					uint32_t kfd_dbg_trap_cntl_prev)
+{
+	uint32_t data = 0;
+
+	*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+	data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+	data = trap_mask_map_sw_to_hw(data);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+	return data;
+}
+
+static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
+
+	return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst)
+{
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 7);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low);
+
+	return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
+						uint32_t watch_id)
+{
+	return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev,
+					    uint32_t pipe_id, uint32_t queue_id,
+					    uint32_t inst)
+{
+	return 0;
+}
+
+static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
+				      uint32_t pipe_id, uint32_t queue_id,
+				      uint32_t inst, unsigned int utimeout)
+{
+	return 0;
+}
+
+static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+						  int engine, int queue)
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
+	.program_sh_mem_settings = program_sh_mem_settings_v11,
+	.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
+	.init_interrupts = init_interrupts_v11,
+	.hqd_load = hqd_load_v11,
+	.hiq_mqd_load = hiq_mqd_load_v11,
+	.hqd_sdma_load = hqd_sdma_load_v11,
+	.hqd_dump = hqd_dump_v11,
+	.hqd_sdma_dump = hqd_sdma_dump_v11,
+	.hqd_is_occupied = hqd_is_occupied_v11,
+	.hqd_sdma_is_occupied = hqd_sdma_is_occupied_v11,
+	.hqd_destroy = hqd_destroy_v11,
+	.hqd_sdma_destroy = hqd_sdma_destroy_v11,
+	.wave_control_execute = wave_control_execute_v11,
+	.get_atc_vmid_pasid_mapping_info = NULL,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
+	.enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v11_set_address_watch,
+	.clear_address_watch = kgd_gfx_v11_clear_address_watch,
+	.hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v11_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
new file mode 100644
index 000000000000..e0ceab400b2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	mutex_lock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
+		break;
+	default:
+		BUG();
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+			queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
+}
+
+static int hqd_dump_v12(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+
+	const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL;
+	const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS;
+#undef HQD_N_REGS
+#define HQD_N_REGS (last_reg - first_reg + 1)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = first_reg;
+	     reg <= last_reg; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int wave_control_execute_v12(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SA_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev,
+					    bool restore_dbg_registers,
+					    uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev,
+						bool keep_trap_enabled,
+						uint32_t vmid)
+{
+	uint32_t data = 0;
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
+
+	return data;
+}
+
+static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev,
+							uint32_t trap_override,
+							uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
+				KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+				KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+				KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+				KFD_DBG_TRAP_MASK_FP_INEXACT |
+				KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+				KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+				KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
+				KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
+				KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
+			trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
+		return -EPERM;
+
+	return 0;
+}
+
+static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
+{
+	uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
+	uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
+	uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
+			KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
+			KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
+			KFD_DBG_TRAP_MASK_FP_OVERFLOW |
+			KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
+			KFD_DBG_TRAP_MASK_FP_INEXACT |
+			KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
+			KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
+			KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
+	uint32_t ret;
+
+	ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
+	ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
+
+	return ret;
+}
+
+static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
+{
+	uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
+
+	if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
+		ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
+
+	return ret;
+}
+
+/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					uint32_t vmid,
+					uint32_t trap_override,
+					uint32_t trap_mask_bits,
+					uint32_t trap_mask_request,
+					uint32_t *trap_mask_prev,
+					uint32_t kfd_dbg_trap_cntl_prev)
+
+{
+	uint32_t data = 0;
+
+	*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
+
+	data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
+	data = trap_mask_map_sw_to_hw(data);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+	data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
+
+	return data;
+}
+
+/* returns STALL_VMID or LAUNCH_MODE. */
+static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid)
+{
+	uint32_t data = 0;
+	bool is_stall_mode = wave_launch_mode == 4;
+
+	if (is_stall_mode)
+		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
+									1);
+	else
+		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
+							wave_launch_mode);
+
+	return data;
+}
+
+#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
+static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst)
+{
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 7);
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low);
+
+	return watch_address_cntl;
+}
+
+static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
+					uint32_t watch_id)
+{
+	return 0;
+}
+
+static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+						 int engine, int queue)
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
+	.init_interrupts = init_interrupts_v12,
+	.hqd_dump = hqd_dump_v12,
+	.hqd_sdma_dump = hqd_sdma_dump_v12,
+	.wave_control_execute = wave_control_execute_v12,
+	.get_atc_vmid_pasid_mapping_info = NULL,
+	.enable_debug_trap = kgd_gfx_v12_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v12_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v12_set_address_watch,
+	.clear_address_watch = kgd_gfx_v12_clear_address_watch,
+	.hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
new file mode 100644
index 000000000000..df77558e03ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -0,0 +1,589 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "cikd.h"
+#include "cik_sdma.h"
+#include "gfx_v7_0.h"
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+#include "cik_structs.h"
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
+enum {
+	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
+	MAX_WATCH_ADDRESSES = 4
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+	mutex_lock(&adev->srbm_mutex);
+	WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	WREG32(mmSRBM_GFX_CNTL, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+	WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+	WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+	unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished and the
+	 * SW cleared it. So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+		cpu_relax();
+	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+	return 0;
+}
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static inline uint32_t get_sdma_rlc_reg_offset(struct cik_sdma_rlc_registers *m)
+{
+	uint32_t retval;
+
+	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+			m->sdma_engine_id, m->sdma_queue_id, retval);
+
+	return retval;
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+	return (struct cik_mqd *)mqd;
+}
+
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+	return (struct cik_sdma_rlc_registers *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+	struct cik_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, wptr_val, data;
+	bool valid_wptr = false;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Copy userspace write pointer value to register.
+	 * Activate doorbell logic to monitor subsequent changes.
+	 */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	/* read_user_ptr may take the mm->mmap_lock.
+	 * release srbm_mutex to avoid circular dependency between
+	 * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
+	 */
+	release_queue(adev);
+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+	acquire_queue(adev, pipe_id, queue_id);
+	if (valid_wptr)
+		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(mmCP_HQD_ACTIVE, data);
+
+	release_queue(adev);
+
+	return 0;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS (35+4)
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct cik_sdma_rlc_registers *m;
+	unsigned long end_jiffies;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t data;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdma_rlc_rb_rptr);
+
+	if (read_user_wptr(mm, wptr, data))
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
+	else
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdma_rlc_rb_rptr);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
+				m->sdma_rlc_virtual_addr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdma_rlc_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdma_rlc_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdma_rlc_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+		queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+				uint64_t queue_address, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	act = RREG32(mmCP_HQD_ACTIVE);
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+				high == RREG32(mmCP_HQD_PQ_BASE_HI))
+			retval = true;
+	}
+	release_queue(adev);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+	struct cik_sdma_rlc_registers *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t temp;
+	enum hqd_dequeue_request_type type;
+	unsigned long flags, end_jiffies;
+	int retry;
+
+	if (amdgpu_in_reset(adev))
+		return -EIO;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");
+
+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(mmCP_HQD_ACTIVE);
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out\n");
+			release_queue(adev);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(adev);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct cik_sdma_rlc_registers *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdma_rlc_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+
+	return 0;
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+	WREG32(mmSQ_CMD, sq_cmd);
+
+	/*  Restore the GRBM_GFX_INDEX register  */
+
+	data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK |
+		GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
+		GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK;
+
+	WREG32(mmGRBM_GFX_INDEX, data);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static void set_scratch_backing_va(struct amdgpu_device *adev,
+					uint64_t va, uint32_t vmid)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(adev);
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+			uint32_t vmid, uint64_t page_table_base)
+{
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+		lower_32_bits(page_table_base));
+}
+
+ /**
+  * read_vmid_from_vmfault_reg - read vmid from register
+  *
+  * adev: amdgpu_device pointer
+  * @vmid: vmid pointer
+  * read vmid from register (CIK).
+  */
+static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev)
+{
+	uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
+
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					  int engine, int queue)
+
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v7_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.wave_control_execute = kgd_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+	.hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
new file mode 100644
index 000000000000..e68c0fa8d751
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -0,0 +1,610 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gfx_v8_0.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "vi_structs.h"
+#include "vid.h"
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
+static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
+
+	mutex_lock(&adev->srbm_mutex);
+	WREG32(mmSRBM_GFX_CNTL, value);
+}
+
+static void unlock_srbm(struct amdgpu_device *adev)
+{
+	WREG32(mmSRBM_GFX_CNTL, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, queue_id, 0);
+}
+
+static void release_queue(struct amdgpu_device *adev)
+{
+	unlock_srbm(adev);
+}
+
+static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+
+	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32(mmSH_MEM_APE1_BASE, sh_mem_ape1_base);
+	WREG32(mmSH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
+	WREG32(mmSH_MEM_BASES, sh_mem_bases);
+
+	unlock_srbm(adev);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
+
+	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
+		cpu_relax();
+	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
+
+	return 0;
+}
+
+static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(adev, mec, pipe, 0, 0);
+
+	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(adev);
+
+	return 0;
+}
+
+static inline uint32_t get_sdma_rlc_reg_offset(struct vi_sdma_mqd *m)
+{
+	uint32_t retval;
+
+	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
+		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n",
+			m->sdma_engine_id, m->sdma_queue_id, retval);
+
+	return retval;
+}
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+	return (struct vi_mqd *)mqd;
+}
+
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct vi_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
+{
+	struct vi_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, wptr_val, data;
+	bool valid_wptr = false;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(mmRLC_CP_SCHEDULERS);
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(mmRLC_CP_SCHEDULERS, value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+	 * This is safe since EOP RPTR==WPTR for any inactive HQD
+	 * on ASICs that do not support context-save.
+	 * EOP writes/reads can start anywhere in the ring.
+	 */
+	if (adev->asic_type != CHIP_TONGA) {
+		WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+		WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+		WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+	}
+
+	for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Copy userspace write pointer value to register.
+	 * Activate doorbell logic to monitor subsequent changes.
+	 */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	/* read_user_ptr may take the mm->mmap_lock.
+	 * release srbm_mutex to avoid circular dependency between
+	 * srbm_mutex->mmap_lock->reservation_ww_class_mutex->srbm_mutex.
+	 */
+	release_queue(adev);
+	valid_wptr = read_user_wptr(mm, wptr, wptr_val);
+	acquire_queue(adev, pipe_id, queue_id);
+	if (valid_wptr)
+		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(mmCP_HQD_ACTIVE, data);
+
+	release_queue(adev);
+
+	return 0;
+}
+
+static int kgd_hqd_dump(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS (54+4)
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
+	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
+		DUMP_REG(reg);
+
+	release_queue(adev);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct vi_sdma_mqd *m;
+	unsigned long end_jiffies;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t data;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+
+	if (read_user_wptr(mm, wptr, data))
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR, data);
+	else
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_VIRTUAL_ADDR,
+				m->sdmax_rlcx_virtual_addr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
+		queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+4+2+3+7)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
+	     reg++)
+		DUMP_REG(sdma_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
+				uint64_t queue_address, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(adev, pipe_id, queue_id);
+	act = RREG32(mmCP_HQD_ACTIVE);
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(mmCP_HQD_PQ_BASE) &&
+				high == RREG32(mmCP_HQD_PQ_BASE_HI))
+			retval = true;
+	}
+	release_queue(adev);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+	struct vi_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t temp;
+	enum hqd_dequeue_request_type type;
+	unsigned long flags, end_jiffies;
+	int retry;
+	struct vi_mqd *m = get_mqd(mqd);
+
+	if (amdgpu_in_reset(adev))
+		return -EIO;
+
+	acquire_queue(adev, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");
+
+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(mmCP_HQD_ACTIVE);
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			release_queue(adev);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(adev);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct vi_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(m);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int kgd_wave_control_execute(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
+	WREG32(mmSQ_CMD, sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SH_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(mmGRBM_GFX_INDEX, data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static void set_scratch_backing_va(struct amdgpu_device *adev,
+					uint64_t va, uint32_t vmid)
+{
+	lock_srbm(adev, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(adev);
+}
+
+static void set_vm_context_page_table_base(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t page_table_base)
+{
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID\n");
+		return;
+	}
+	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+			lower_32_bits(page_table_base));
+}
+
+static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					  int engine, int queue)
+
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v8_kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.wave_control_execute = kgd_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 000000000000..088d09cc7a72
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1264 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v9_0.h"
+#include "amdgpu_amdkfd_gfx_v9.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES,
+	SAVE_WAVES
+};
+
+static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid, uint32_t inst)
+{
+	mutex_lock(&adev->srbm_mutex);
+	soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst));
+}
+
+static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst)
+{
+	soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst);
+}
+
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			queue_id;
+
+	return 1ull << bit;
+}
+
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst)
+{
+	kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases, uint32_t inst)
+{
+	kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
+
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+					unsigned int vmid, uint32_t inst)
+{
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	/*
+	 * need to do this twice, once for gfx and once for mmhub
+	 * for ATC add 16 to VMID for mmhub, for IH different registers.
+	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+	 */
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << vmid)))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << vmid);
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+	       pasid_mapping);
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << (vmid + 16))))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << (vmid + 16));
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+	       pasid_mapping);
+	return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst)
+{
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst);
+
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL,
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	kgd_gfx_v9_unlock_srbm(adev, inst);
+
+	return 0;
+}
+
+static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t sdma_engine_reg_base = 0;
+	uint32_t sdma_rlc_reg_offset;
+
+	switch (engine_id) {
+	default:
+		dev_warn(adev->dev,
+			 "Invalid sdma engine id (%d), using engine id 0\n",
+			 engine_id);
+		fallthrough;
+	case 0:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	case 1:
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+				mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+		break;
+	}
+
+	sdma_rlc_reg_offset = sdma_engine_reg_base
+		+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
+		 queue_id, sdma_rlc_reg_offset);
+
+	return sdma_rlc_reg_offset;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t __user *wptr, uint32_t wptr_shift,
+			uint32_t wptr_mask, struct mm_struct *mm,
+			uint32_t inst)
+{
+	struct v9_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO,
+			lower_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI,
+			upper_32_bits(guessed_wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR,
+			lower_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+			upper_32_bits((uintptr_t)wptr));
+		WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1,
+			(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR,
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data);
+
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	return 0;
+}
+
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off, uint32_t inst)
+{
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring;
+	struct v9_mqd *m;
+	uint32_t mec, pipe;
+	int r;
+
+	m = get_mqd(mqd);
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+		 mec, pipe, queue_id);
+
+	spin_lock(&adev->gfx.kiq[inst].ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, 7);
+	if (r) {
+		pr_err("Failed to alloc KIQ (%d).\n", r);
+		goto out_unlock;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(m->cp_hqd_vmid) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(queue_id) |
+			  PACKET3_MAP_QUEUES_PIPE(pipe) |
+			  PACKET3_MAP_QUEUES_ME((mec - 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(1) | /* engine_sel: hiq */
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(doorbell_off));
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_mqd_base_addr_hi);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_lo);
+	amdgpu_ring_write(kiq_ring, m->cp_hqd_pq_wptr_poll_addr_hi);
+	amdgpu_ring_commit(kiq_ring);
+
+out_unlock:
+	spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	return r;
+}
+
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
+{
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+	for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR,
+				m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
+			engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	*dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_rlc_reg_offset + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+				uint64_t queue_address, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+	act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) &&
+		   high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI))
+			retval = true;
+	}
+	kgd_gfx_v9_release_queue(adev, inst);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst)
+{
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v9_mqd *m = get_mqd(mqd);
+
+	if (amdgpu_in_reset(adev))
+		return -EIO;
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
+		type = SAVE_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			kgd_gfx_v9_release_queue(adev, inst);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	kgd_gfx_v9_release_queue(adev, inst);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
+				unsigned int utimeout)
+{
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_rlc_reg_offset;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("SDMA RLC not idle in %s\n", __func__);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_rlc_reg_offset + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid)
+{
+	uint32_t value;
+
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst)
+{
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val);
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SH_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+/*
+ * GFX9 helper for wave launch stall requirements on debug trap setting.
+ *
+ * vmid:
+ *   Target VMID to stall/unstall.
+ *
+ * stall:
+ *   0-unstall wave launch (enable), 1-stall wave launch (disable).
+ *   After wavefront launch has been stalled, allocated waves must drain from
+ *   SPI in order for debug trap settings to take effect on those waves.
+ *   This is roughly a ~96 clock cycle wait on SPI where a read on
+ *   SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
+ *   KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
+ *
+ *   NOTE: We can afford to clear the entire STALL_VMID field on unstall
+ *   because GFX9.4.1 cannot support multi-process debugging due to trap
+ *   configuration and masking being limited to global scope.  Always assume
+ *   single process conditions.
+ */
+#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY	3
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+					uint32_t vmid,
+					bool stall)
+{
+	int i;
+	uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
+		data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
+							stall ? 1 << vmid : 0);
+	else
+		data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
+							stall ? 1 : 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
+
+	if (!stall)
+		return;
+
+	for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
+		RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+}
+
+/*
+ * restore_dbg_registers is ignored here but is a general interface requirement
+ * for devices that support GFXOFF and where the RLC save/restore list
+ * does not support hw registers for debugging i.e. the driver has to manually
+ * initialize the debug mode registers after it has disabled GFX off during the
+ * debug session.
+ */
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+				bool restore_dbg_registers,
+				uint32_t vmid)
+{
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+/*
+ * keep_trap_enabled is ignored here but is a general interface requirement
+ * for devices that support multi-process debugging where the performance
+ * overhead from trap temporary setup needs to be bypassed when the debug
+ * session has ended.
+ */
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+					bool keep_trap_enabled,
+					uint32_t vmid)
+{
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+					uint32_t trap_override,
+					uint32_t *trap_mask_supported)
+{
+	*trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
+
+	/* The SPI_GDBG_TRAP_MASK register is global and affects all
+	 * processes. Only allow OR-ing the address-watch bit, since
+	 * this only affects processes under the debugger. Other bits
+	 * should stay 0 to avoid the debugger interfering with other
+	 * processes.
+	 */
+	if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
+		return -EINVAL;
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					     uint32_t vmid,
+					     uint32_t trap_override,
+					     uint32_t trap_mask_bits,
+					     uint32_t trap_mask_request,
+					     uint32_t *trap_mask_prev,
+					     uint32_t kfd_dbg_cntl_prev)
+{
+	uint32_t data, wave_cntl_prev;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
+	*trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
+
+	trap_mask_bits = (trap_mask_bits & trap_mask_request) |
+		(*trap_mask_prev & ~trap_mask_request);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
+	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
+
+	/* We need to preserve wave launch mode stall settings. */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid)
+{
+	uint32_t data = 0;
+	bool is_mode_set = !!wave_launch_mode;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
+
+	data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+		VMID_MASK, is_mode_set ? 1 << vmid : 0);
+	data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
+		MODE, is_mode_set ? wave_launch_mode : 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
+
+	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
+
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst)
+{
+	uint32_t watch_address_high;
+	uint32_t watch_address_low;
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+
+	watch_address_low = lower_32_bits(watch_address);
+	watch_address_high = upper_32_bits(watch_address) & 0xffff;
+
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VMID,
+			debug_vmid);
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MODE,
+			watch_mode);
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			MASK,
+			watch_address_mask >> 6);
+
+	/* Turning off this watch point until we set all the registers */
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			0);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_cntl);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_high);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_low);
+
+	/* Enable the watch point */
+	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+			TCP_WATCH0_CNTL,
+			VALID,
+			1);
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_cntl);
+
+	return 0;
+}
+
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+					uint32_t watch_id)
+{
+	uint32_t watch_address_cntl;
+
+	watch_address_cntl = 0;
+
+	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
+			(watch_id * TCP_WATCH_STRIDE)),
+			watch_address_cntl);
+
+	return 0;
+}
+
+/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
+ * The values read are:
+ *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
+ *     atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ *     wrm_offload_wait_time    -- Wait Count for WAIT_REG_MEM Offloads.
+ *     gws_wait_time            -- Wait Count for Global Wave Syncs.
+ *     que_sleep_wait_time      -- Wait Count for Dequeue Retry.
+ *     sch_wave_wait_time       -- Wait Count for Scheduling Wave Message.
+ *     sem_rearm_wait_time      -- Wait Count for Semaphore re-arm.
+ *     deq_retry_wait_time      -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+					uint32_t *wait_times,
+					uint32_t inst)
+
+{
+	*wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst),
+			mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+			uint32_t vmid, uint64_t page_table_base)
+{
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+
+	adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
+static void lock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_lock(&adev->srbm_mutex);
+	mutex_lock(&adev->grbm_idx_mutex);
+
+}
+
+static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
+{
+	mutex_unlock(&adev->grbm_idx_mutex);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+/**
+ * get_wave_count: Read device registers to get number of waves in flight for
+ * a particular queue. The method also returns the VMID associated with the
+ * queue.
+ *
+ * @adev: Handle of device whose registers are to be read
+ * @queue_idx: Index of queue in the queue-map bit-field
+ * @queue_cnt: Stores the wave count and doorbell offset for an active queue
+ * @inst: xcc's instance number on a multi-XCC setup
+ */
+static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
+		struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
+{
+	int pipe_idx;
+	int queue_slot;
+	unsigned int reg_val;
+	unsigned int wave_cnt;
+	/*
+	 * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
+	 * parameters to read out waves in flight. Get VMID if there are
+	 * non-zero waves in flight.
+	 */
+	pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
+	queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
+	soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
+	reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
+				  mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
+	wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
+	if (wave_cnt != 0) {
+		queue_cnt->wave_cnt += wave_cnt;
+		queue_cnt->doorbell_off =
+			(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
+			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
+			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	}
+}
+
+/**
+ * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * shader engine and aggregates the number of waves that are in flight for the
+ * process whose pasid is provided as a parameter. The process could have ZERO
+ * or more queues running and submitting waves to compute units.
+ *
+ * @adev: Handle of device from which to get number of waves in flight
+ * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
+ *		  for comparison later.
+ * @max_waves_per_cu: Output parameter updated with maximum number of waves
+ *                    possible per Compute Unit
+ * @inst: xcc's instance number on a multi-XCC setup
+ *
+ * Note: It's possible that the device has too many queues (oversubscription)
+ * in which case a VMID could be remapped to a different PASID. This could lead
+ * to an inaccurate wave count. Following is a high-level sequence:
+ *    Time T1: vmid = getVmid(); vmid is associated with Pasid P1
+ *    Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2
+ * In the sequence above wave count obtained from time T1 will be incorrectly
+ * lost or added to total wave count.
+ *
+ * The registers that provide the waves in flight are:
+ *
+ *  SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a
+ *  queue is slotted, OFF if there is no queue. A process could have ZERO or
+ *  more queues slotted and submitting waves to be run on compute units. Even
+ *  when there is a queue it is possible there could be zero wave fronts, this
+ *  can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem
+ *  command
+ *
+ *  For each bit that is ON from above:
+ *
+ *    Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the
+ *    number of waves that are in flight for the queue at specified index. The
+ *    index ranges from 0 to 7.
+ *
+ *    If non-zero waves are in flight, store the corresponding doorbell offset
+ *    of the queue, along with the wave count.
+ *
+ *    Determine if the queue belongs to the process by comparing the doorbell
+ *    offset against the process's queues. If it matches, aggregate the wave
+ *    count for the process.
+ *
+ *  Reading registers referenced above involves programming GRBM appropriately
+ */
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+				 struct kfd_cu_occupancy *cu_occupancy,
+				 int *max_waves_per_cu, uint32_t inst)
+{
+	int qidx;
+	int se_idx;
+	int se_cnt;
+	int queue_map;
+	int max_queue_cnt;
+	DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);
+
+	lock_spi_csq_mutexes(adev);
+	soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));
+
+	/*
+	 * Iterate through the shader engines and arrays of the device
+	 * to get number of waves in flight
+	 */
+	bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap,
+			  AMDGPU_MAX_QUEUES);
+	max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
+			adev->gfx.mec.num_queue_per_pipe;
+	se_cnt = adev->gfx.config.max_shader_engines;
+	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
+		amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
+		queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);
+
+		/*
+		 * Assumption: queue map encodes following schema: four
+		 * pipes per each micro-engine, with each pipe mapping
+		 * eight queues. This schema is true for GFX9 devices
+		 * and must be verified for newer device families
+		 */
+		for (qidx = 0; qidx < max_queue_cnt; qidx++) {
+			/* Skip qeueus that are not associated with
+			 * compute functions
+			 */
+			if (!test_bit(qidx, cp_queue_bitmap))
+				continue;
+
+			if (!(queue_map & (1 << qidx)))
+				continue;
+
+			/* Get number of waves in flight and aggregate them */
+			get_wave_count(adev, qidx, &cu_occupancy[qidx],
+					inst);
+		}
+	}
+
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
+	soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
+	unlock_spi_csq_mutexes(adev);
+
+	/* Update the output parameters and return */
+	*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
+				adev->gfx.cu_info.max_waves_per_simd;
+}
+
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+		uint32_t wait_times,
+		uint32_t sch_wave,
+		uint32_t que_sleep,
+		uint32_t *reg_offset,
+		uint32_t *reg_data)
+{
+	*reg_data = wait_times;
+
+	if (sch_wave)
+		*reg_data = REG_SET_FIELD(*reg_data,
+				CP_IQ_WAIT_TIME2,
+				SCH_WAVE,
+				sch_wave);
+	if (que_sleep)
+		*reg_data = REG_SET_FIELD(*reg_data,
+				CP_IQ_WAIT_TIME2,
+				QUE_SLEEP,
+				que_sleep);
+
+	*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
+}
+
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst)
+{
+	kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst);
+
+	/*
+	 * Program TBA registers
+	 */
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO,
+			lower_32_bits(tba_addr >> 8));
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI,
+			upper_32_bits(tba_addr >> 8));
+
+	/*
+	 * Program TMA registers
+	 */
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO,
+			lower_32_bits(tma_addr >> 8));
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI,
+			upper_32_bits(tma_addr >> 8));
+
+	kgd_gfx_v9_unlock_srbm(adev, inst);
+}
+
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+				    uint32_t pipe_id, uint32_t queue_id,
+				    uint32_t inst)
+{
+	uint32_t low, high;
+	uint64_t queue_addr = 0;
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+	amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+	if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+		goto unlock_out;
+
+	low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+	high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+	/* only concerned with user queues. */
+	if (!high)
+		goto unlock_out;
+
+	queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+unlock_out:
+	amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	return queue_addr;
+}
+
+/* assume queue acquired  */
+static int kgd_gfx_v9_hqd_dequeue_wait(struct amdgpu_device *adev, uint32_t inst,
+				       unsigned int utimeout)
+{
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	while (true) {
+		uint32_t temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE);
+
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			return 0;
+
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+
+		usleep_range(500, 1000);
+	}
+}
+
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+			      uint32_t pipe_id, uint32_t queue_id,
+			      uint32_t inst, unsigned int utimeout)
+{
+	uint32_t low, high, pipe_reset_data = 0;
+	uint64_t queue_addr = 0;
+
+	kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
+	amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
+
+	if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE))
+		goto unlock_out;
+
+	low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE);
+	high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI);
+
+	/* only concerned with user queues. */
+	if (!high)
+		goto unlock_out;
+
+	queue_addr = (((queue_addr | high) << 32) | low) << 8;
+
+	pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n",
+		 inst, pipe_id, queue_id);
+
+	/* assume previous dequeue request issued will take affect after reset */
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+	if (!kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+		goto unlock_out;
+
+	pr_debug("Attempting pipe reset on XCC %i pipe id %i\n", inst, pipe_id);
+
+	pipe_reset_data = REG_SET_FIELD(pipe_reset_data, CP_MEC_CNTL, MEC_ME1_PIPE0_RESET, 1);
+	pipe_reset_data = pipe_reset_data << pipe_id;
+
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, pipe_reset_data);
+	WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_MEC_CNTL, 0);
+
+	if (kgd_gfx_v9_hqd_dequeue_wait(adev, inst, utimeout))
+		queue_addr = 0;
+
+unlock_out:
+	pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n",
+		 inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!");
+	amdgpu_gfx_rlc_exit_safe_mode(adev, inst);
+	kgd_gfx_v9_release_queue(adev, inst);
+
+	return queue_addr;
+}
+
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					  int engine, int queue)
+
+{
+	return 0;
+}
+
+const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
+	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_gfx_v9_init_interrupts,
+	.hqd_load = kgd_gfx_v9_hqd_load,
+	.hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_gfx_v9_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_gfx_v9_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
+	.get_atc_vmid_pasid_mapping_info =
+			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
+	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+	.enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
+	.disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
+	.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
+	.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
+	.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
+	.set_address_watch = kgd_gfx_v9_set_address_watch,
+	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
+	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+	.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
+	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
+	.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+	.hqd_reset = kgd_gfx_v9_hqd_reset,
+	.hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
new file mode 100644
index 000000000000..704452ca62f8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
+		uint32_t sh_mem_config,
+		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+		uint32_t sh_mem_bases, uint32_t inst);
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
+		unsigned int vmid, uint32_t inst);
+int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t inst);
+int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm, uint32_t inst);
+int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
+			    uint32_t pipe_id, uint32_t queue_id,
+			    uint32_t doorbell_off, uint32_t inst);
+int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
+bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
+			uint64_t queue_address, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst);
+int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd, uint32_t inst);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
+					uint8_t vmid, uint16_t *p_pasid);
+void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
+			uint32_t vmid, uint64_t page_table_base);
+void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
+				 struct kfd_cu_occupancy *cu_occupancy,
+				 int *max_waves_per_cu, uint32_t inst);
+void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
+		uint32_t inst);
+void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
+				uint32_t queue_id, uint32_t inst);
+uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
+				uint32_t pipe_id, uint32_t queue_id);
+void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
+void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
+					uint32_t vmid,
+					bool stall);
+uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
+				      bool restore_dbg_registers,
+				      uint32_t vmid);
+uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
+					bool keep_trap_enabled,
+					uint32_t vmid);
+int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
+					     uint32_t trap_override,
+					     uint32_t *trap_mask_supported);
+uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
+					uint8_t wave_launch_mode,
+					uint32_t vmid);
+uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
+					     uint32_t vmid,
+					     uint32_t trap_override,
+					     uint32_t trap_mask_bits,
+					     uint32_t trap_mask_request,
+					     uint32_t *trap_mask_prev,
+					     uint32_t kfd_dbg_trap_cntl_prev);
+uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t watch_id,
+					uint32_t watch_mode,
+					uint32_t debug_vmid,
+					uint32_t inst);
+uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
+					uint32_t watch_id);
+void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
+				uint32_t *wait_times,
+				uint32_t inst);
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
+					       uint32_t wait_times,
+					       uint32_t sch_wave,
+					       uint32_t que_sleep,
+					       uint32_t *reg_offset,
+					       uint32_t *reg_data);
+uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+				    uint32_t pipe_id,
+				    uint32_t queue_id,
+				    uint32_t inst);
+uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
+			      uint32_t pipe_id,
+			      uint32_t queue_id,
+			      uint32_t inst,
+			      unsigned int utimeout);
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+					  int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
new file mode 100644
index 000000000000..b1c24c8fa686
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -0,0 +1,3252 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/dma-buf.h>
+#include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/drm_exec.h>
+
+#include "amdgpu_object.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+#define AMDGPU_RESERVE_MEM_LIMIT			(3UL << 29)
+
+/*
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
+
+/* Impose limit on how much memory KFD can use */
+static struct {
+	uint64_t max_system_mem_limit;
+	uint64_t max_ttm_mem_limit;
+	int64_t system_mem_used;
+	int64_t ttm_mem_used;
+	spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+
+static const char * const domain_bit_to_string[] = {
+		"CPU",
+		"GTT",
+		"VRAM",
+		"GDS",
+		"GWS",
+		"OA"
+};
+
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
+
+static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
+		struct kgd_mem *mem)
+{
+	struct kfd_mem_attachment *entry;
+
+	list_for_each_entry(entry, &mem->attachments, list)
+		if (entry->bo_va->base.vm == avm)
+			return true;
+
+	return false;
+}
+
+/**
+ * reuse_dmamap() - Check whether adev can share the original
+ * userptr BO
+ *
+ * If both adev and bo_adev are in direct mapping or
+ * in the same iommu group, they can share the original BO.
+ *
+ * @adev: Device to which can or cannot share the original BO
+ * @bo_adev: Device to which allocated BO belongs to
+ *
+ * Return: returns true if adev can share original userptr BO,
+ * false otherwise.
+ */
+static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
+{
+	return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
+			(adev->dev->iommu_group == bo_adev->dev->iommu_group);
+}
+
+/* Set memory usage limits. Current, limits are
+ *  System (TTM + userptr) memory - 15/16th System RAM
+ *  TTM memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+	struct sysinfo si;
+	uint64_t mem;
+
+	if (kfd_mem_limit.max_system_mem_limit)
+		return;
+
+	si_meminfo(&si);
+	mem = si.totalram - si.totalhigh;
+	mem *= si.mem_unit;
+
+	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+	kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6);
+	if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT)
+		kfd_mem_limit.max_system_mem_limit >>= 1;
+	else
+		kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT;
+
+	kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT;
+	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20),
+		(kfd_mem_limit.max_ttm_mem_limit >> 20));
+}
+
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+	kfd_mem_limit.system_mem_used += size;
+}
+
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
+
+/**
+ * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer.
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is
+ * managed as one compute node in driver for app
+ *
+ * Return:
+ *	returns -ENOMEM in case of error, ZERO otherwise
+ */
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+		uint64_t size, u32 alloc_flag, int8_t xcp_id)
+{
+	uint64_t reserved_for_pt =
+		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+	size_t system_mem_needed, ttm_mem_needed, vram_needed;
+	int ret = 0;
+	uint64_t vram_size = 0;
+
+	system_mem_needed = 0;
+	ttm_mem_needed = 0;
+	vram_needed = 0;
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+		system_mem_needed = size;
+		ttm_mem_needed = size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		/*
+		 * Conservatively round up the allocation requirement to 2 MB
+		 * to avoid fragmentation caused by 4K allocations in the tail
+		 * 2M BO chunk.
+		 */
+		vram_needed = size;
+		/*
+		 * For GFX 9.4.3, get the VRAM size from XCP structs
+		 */
+		if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+			return -EINVAL;
+
+		vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+		if (adev->apu_prefer_gtt) {
+			system_mem_needed = size;
+			ttm_mem_needed = size;
+		}
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+		system_mem_needed = size;
+	} else if (!(alloc_flag &
+				(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		return -ENOMEM;
+	}
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+	if (kfd_mem_limit.system_mem_used + system_mem_needed >
+	    kfd_mem_limit.max_system_mem_limit) {
+		pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
+		if (!no_system_mem_limit) {
+			ret = -ENOMEM;
+			goto release;
+		}
+	}
+
+	if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+		kfd_mem_limit.max_ttm_mem_limit) {
+		ret = -ENOMEM;
+		goto release;
+	}
+
+	/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
+	 * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
+	 * VRAM check since ttm_mem_limit check already cover this allocation
+	 */
+
+	if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
+		uint64_t vram_available =
+			vram_size - reserved_for_pt - reserved_for_ras -
+			atomic64_read(&adev->vram_pin_size);
+		if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
+			ret = -ENOMEM;
+			goto release;
+		}
+	}
+
+	/* Update memory accounting by decreasing available system
+	 * memory, TTM memory and GPU memory as computed above
+	 */
+	WARN_ONCE(vram_needed && !adev,
+		  "adev reference can't be null when vram is used");
+	if (adev && xcp_id >= 0) {
+		adev->kfd.vram_used[xcp_id] += vram_needed;
+		adev->kfd.vram_used_aligned[xcp_id] +=
+				adev->apu_prefer_gtt ?
+				vram_needed :
+				ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+	}
+	kfd_mem_limit.system_mem_used += system_mem_needed;
+	kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+	return ret;
+}
+
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+		uint64_t size, u32 alloc_flag, int8_t xcp_id)
+{
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+	if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+		kfd_mem_limit.system_mem_used -= size;
+		kfd_mem_limit.ttm_mem_used -= size;
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		WARN_ONCE(!adev,
+			  "adev reference can't be null when alloc mem flags vram is set");
+		if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+			goto release;
+
+		if (adev) {
+			adev->kfd.vram_used[xcp_id] -= size;
+			if (adev->apu_prefer_gtt) {
+				adev->kfd.vram_used_aligned[xcp_id] -= size;
+				kfd_mem_limit.system_mem_used -= size;
+				kfd_mem_limit.ttm_mem_used -= size;
+			} else {
+				adev->kfd.vram_used_aligned[xcp_id] -=
+					ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+			}
+		}
+	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+		kfd_mem_limit.system_mem_used -= size;
+	} else if (!(alloc_flag &
+				(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+		pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+		goto release;
+	}
+	WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+		  "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
+	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+		  "KFD TTM memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+		  "KFD system memory accounting unbalanced");
+
+release:
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	u32 alloc_flags = bo->kfd_bo->alloc_flags;
+	u64 size = amdgpu_bo_size(bo);
+
+	amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+					  bo->xcp_id);
+
+	kfree(bo->kfd_bo);
+}
+
+/**
+ * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ *
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ *	 in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+		 struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+	struct drm_gem_object *gem_obj;
+	int ret;
+	uint64_t flags = 0;
+
+	ret = amdgpu_bo_reserve(mem->bo, false);
+	if (ret)
+		return ret;
+
+	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
+		flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+					AMDGPU_GEM_CREATE_UNCACHED);
+
+	ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
+			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
+			ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0);
+
+	amdgpu_bo_unreserve(mem->bo);
+
+	if (ret) {
+		pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
+		return -EINVAL;
+	}
+
+	*bo_out = gem_to_amdgpu_bo(gem_obj);
+	(*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+	return ret;
+}
+
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
+ *  reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] This eviction fence is removed if it
+ *  is present in the shared list.
+ *
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+					struct amdgpu_amdkfd_fence *ef)
+{
+	struct dma_fence *replacement;
+
+	if (!ef)
+		return -EINVAL;
+
+	/* TODO: Instead of block before we should use the fence of the page
+	 * table update and TLB flush here directly.
+	 */
+	replacement = dma_fence_get_stub();
+	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+				replacement, DMA_RESV_USAGE_BOOKKEEP);
+	dma_fence_put(replacement);
+	return 0;
+}
+
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
+{
+	struct dma_resv *resv = &bo->tbo.base._resv;
+	struct dma_fence *fence, *stub;
+	struct dma_resv_iter cursor;
+
+	dma_resv_assert_held(resv);
+
+	stub = dma_fence_get_stub();
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+		if (!to_amdgpu_amdkfd_fence(fence))
+			continue;
+
+		dma_resv_replace_fences(resv, fence->context, stub,
+					DMA_RESV_USAGE_BOOKKEEP);
+	}
+	dma_fence_put(stub);
+}
+
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+				     bool wait)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
+
+	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+		 "Called with userptr BO"))
+		return -EINVAL;
+
+	/* bo has been pinned, not need validate it */
+	if (bo->tbo.pin_count)
+		return 0;
+
+	amdgpu_bo_placement_from_domain(bo, domain);
+
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		goto validate_fail;
+	if (wait)
+		amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+
+validate_fail:
+	return ret;
+}
+
+int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
+					uint32_t domain,
+					struct dma_fence *fence)
+{
+	int ret = amdgpu_bo_reserve(bo, false);
+
+	if (ret)
+		return ret;
+
+	ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+	if (ret)
+		goto unreserve_out;
+
+	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+	if (ret)
+		goto unreserve_out;
+
+	dma_resv_add_fence(bo->tbo.base.resv, fence,
+			   DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
+	amdgpu_bo_unreserve(bo);
+
+	return ret;
+}
+
+static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+	return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
+}
+
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm,
+				 struct ww_acquire_ctx *ticket)
+{
+	struct amdgpu_bo *pd = vm->root.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	int ret;
+
+	ret = amdgpu_vm_validate(adev, vm, ticket,
+				 amdgpu_amdkfd_validate_vm_bo, NULL);
+	if (ret) {
+		pr_err("failed to validate PT BOs\n");
+		return ret;
+	}
+
+	vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+
+	return 0;
+}
+
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo *pd = vm->root.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+	int ret;
+
+	ret = amdgpu_vm_update_pdes(adev, vm, false);
+	if (ret)
+		return ret;
+
+	return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
+}
+
+static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			      struct kgd_mem *mem)
+{
+	uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+				 AMDGPU_VM_MTYPE_DEFAULT;
+
+	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+	return mapping_flags;
+}
+
+/**
+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+	struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+	if (!sg)
+		return NULL;
+	if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+		kfree(sg);
+		return NULL;
+	}
+	sg_dma_address(sg->sgl) = addr;
+	sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+	sg->sgl->dma_length = size;
+#endif
+	return sg;
+}
+
+static int
+kfd_mem_dmamap_userptr(struct kgd_mem *mem,
+		       struct kfd_mem_attachment *attachment)
+{
+	enum dma_data_direction direction =
+		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *src_ttm = mem->bo->tbo.ttm;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+	int ret;
+
+	if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+		return -EINVAL;
+
+	ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
+	if (unlikely(!ttm->sg))
+		return -ENOMEM;
+
+	/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
+	ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
+					ttm->num_pages, 0,
+					(u64)ttm->num_pages << PAGE_SHIFT,
+					GFP_KERNEL);
+	if (unlikely(ret))
+		goto free_sg;
+
+	ret = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+	if (unlikely(ret))
+		goto release_sg;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		goto unmap_sg;
+
+	return 0;
+
+unmap_sg:
+	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+release_sg:
+	pr_err("DMA map userptr failed: %d\n", ret);
+	sg_free_table(ttm->sg);
+free_sg:
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+	return ret;
+}
+
+static int
+kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/**
+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it instead
+ * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without regard to
+ * device ownership. This is because access requests for MMIO go through PCIe root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ *   - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ *         in updating requesting device's page table
+ *   - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ *         accessible. This allows an update of requesting device's page table
+ *         with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ *   - Mapping of DOORBELL or MMIO BO of same or peer device
+ *   - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+		     struct kfd_mem_attachment *attachment)
+{
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+	enum dma_data_direction dir;
+	dma_addr_t dma_addr;
+	bool mmio;
+	int ret;
+
+	/* Expect SG Table of dmapmap BO to be NULL */
+	mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+	if (unlikely(ttm->sg)) {
+		pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
+		return -EINVAL;
+	}
+
+	dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+			DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+	pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+	pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+	dma_addr = dma_map_resource(adev->dev, dma_addr,
+			mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+	ret = dma_mapping_error(adev->dev, dma_addr);
+	if (unlikely(ret))
+		return ret;
+	pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+	ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+	if (unlikely(!ttm->sg)) {
+		ret = -ENOMEM;
+		goto unmap_sg;
+	}
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (unlikely(ret))
+		goto free_sg;
+
+	return ret;
+
+free_sg:
+	sg_free_table(ttm->sg);
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+unmap_sg:
+	dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+			   dir, DMA_ATTR_SKIP_CPU_SYNC);
+	return ret;
+}
+
+static int
+kfd_mem_dmamap_attachment(struct kgd_mem *mem,
+			  struct kfd_mem_attachment *attachment)
+{
+	switch (attachment->type) {
+	case KFD_MEM_ATT_SHARED:
+		return 0;
+	case KFD_MEM_ATT_USERPTR:
+		return kfd_mem_dmamap_userptr(mem, attachment);
+	case KFD_MEM_ATT_DMABUF:
+		return kfd_mem_dmamap_dmabuf(attachment);
+	case KFD_MEM_ATT_SG:
+		return kfd_mem_dmamap_sg_bo(mem, attachment);
+	default:
+		WARN_ON_ONCE(1);
+	}
+	return -EINVAL;
+}
+
+static void
+kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
+			 struct kfd_mem_attachment *attachment)
+{
+	enum dma_data_direction direction =
+		mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	struct ttm_operation_ctx ctx = {.interruptible = false};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+
+	if (unlikely(!ttm->sg))
+		return;
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+	(void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+	sg_free_table(ttm->sg);
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+	/* This is a no-op. We don't want to trigger eviction fences when
+	 * unmapping DMABufs. Therefore the invalidation (moving to system
+	 * domain) is done in kfd_mem_dmamap_dmabuf.
+	 */
+}
+
+/**
+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ *   - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ *   - Free SG Table that is used to encapsulate DMA mapped memory of
+ *          peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ *     UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ *     Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+		       struct kfd_mem_attachment *attachment)
+{
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+	struct amdgpu_device *adev = attachment->adev;
+	struct ttm_tt *ttm = bo->tbo.ttm;
+	enum dma_data_direction dir;
+
+	if (unlikely(!ttm->sg)) {
+		pr_debug("SG Table of BO is NULL");
+		return;
+	}
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+	(void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+	dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+				DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+			ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(ttm->sg);
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+	bo->tbo.sg = NULL;
+}
+
+static void
+kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
+			    struct kfd_mem_attachment *attachment)
+{
+	switch (attachment->type) {
+	case KFD_MEM_ATT_SHARED:
+		break;
+	case KFD_MEM_ATT_USERPTR:
+		kfd_mem_dmaunmap_userptr(mem, attachment);
+		break;
+	case KFD_MEM_ATT_DMABUF:
+		kfd_mem_dmaunmap_dmabuf(attachment);
+		break;
+	case KFD_MEM_ATT_SG:
+		kfd_mem_dmaunmap_sg_bo(mem, attachment);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
+}
+
+static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
+{
+	if (!mem->dmabuf) {
+		struct amdgpu_device *bo_adev;
+		struct dma_buf *dmabuf;
+
+		bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+		dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
+					       mem->gem_handle,
+			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+					       DRM_RDWR : 0);
+		if (IS_ERR(dmabuf))
+			return PTR_ERR(dmabuf);
+		mem->dmabuf = dmabuf;
+	}
+
+	return 0;
+}
+
+static int
+kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
+		      struct amdgpu_bo **bo)
+{
+	struct drm_gem_object *gobj;
+	int ret;
+
+	ret = kfd_mem_export_dmabuf(mem);
+	if (ret)
+		return ret;
+
+	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+	if (IS_ERR(gobj))
+		return PTR_ERR(gobj);
+
+	*bo = gem_to_amdgpu_bo(gobj);
+	(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+	return 0;
+}
+
+/* kfd_mem_attach - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 0. Create BO for DMA mapping, if needed
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a.  Validate new page tables and directories
+ */
+static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
+		struct amdgpu_vm *vm, bool is_aql)
+{
+	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
+	unsigned long bo_size = mem->bo->tbo.base.size;
+	uint64_t va = mem->va;
+	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
+	struct amdgpu_bo *bo[2] = {NULL, NULL};
+	struct amdgpu_bo_va *bo_va;
+	bool same_hive = false;
+	int i, ret;
+
+	if (!va) {
+		pr_err("Invalid VA when adding BO to VM\n");
+		return -EINVAL;
+	}
+
+	/* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
+	 *
+	 * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+	 * In contrast the access path of VRAM BOs depens upon the type of
+	 * link that connects the peer device. Access over PCIe is allowed
+	 * if peer device has large BAR. In contrast, access over xGMI is
+	 * allowed for both small and large BAR configurations of peer device
+	 */
+	if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
+	    ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
+	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+		if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
+			same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+		if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
+			return -EINVAL;
+	}
+
+	for (i = 0; i <= is_aql; i++) {
+		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+		if (unlikely(!attachment[i])) {
+			ret = -ENOMEM;
+			goto unwind;
+		}
+
+		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+			 va + bo_size, vm);
+
+		if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+		    (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
+		    (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) ||
+		    same_hive) {
+			/* Mappings on the local GPU, or VRAM mappings in the
+			 * local hive, or userptr, or GTT mapping can reuse dma map
+			 * address space share the original BO
+			 */
+			attachment[i]->type = KFD_MEM_ATT_SHARED;
+			bo[i] = mem->bo;
+			drm_gem_object_get(&bo[i]->tbo.base);
+		} else if (i > 0) {
+			/* Multiple mappings on the same GPU share the BO */
+			attachment[i]->type = KFD_MEM_ATT_SHARED;
+			bo[i] = bo[0];
+			drm_gem_object_get(&bo[i]->tbo.base);
+		} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+			/* Create an SG BO to DMA-map userptrs on other GPUs */
+			attachment[i]->type = KFD_MEM_ATT_USERPTR;
+			ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+			if (ret)
+				goto unwind;
+		/* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
+		} else if (mem->bo->tbo.type == ttm_bo_type_sg) {
+			WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
+				    mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
+				  "Handing invalid SG BO in ATTACH request");
+			attachment[i]->type = KFD_MEM_ATT_SG;
+			ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+			if (ret)
+				goto unwind;
+		/* Enable acces to GTT and VRAM BOs of peer devices */
+		} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
+			   mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
+			attachment[i]->type = KFD_MEM_ATT_DMABUF;
+			ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+			if (ret)
+				goto unwind;
+			pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
+		} else {
+			WARN_ONCE(true, "Handling invalid ATTACH request");
+			ret = -EINVAL;
+			goto unwind;
+		}
+
+		/* Add BO to VM internal data structures */
+		ret = amdgpu_bo_reserve(bo[i], false);
+		if (ret) {
+			pr_debug("Unable to reserve BO during memory attach");
+			goto unwind;
+		}
+		bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+		if (!bo_va)
+			bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		else
+			++bo_va->ref_count;
+		attachment[i]->bo_va = bo_va;
+		amdgpu_bo_unreserve(bo[i]);
+		if (unlikely(!attachment[i]->bo_va)) {
+			ret = -ENOMEM;
+			pr_err("Failed to add BO object to VM. ret == %d\n",
+			       ret);
+			goto unwind;
+		}
+		attachment[i]->va = va;
+		attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
+		attachment[i]->adev = adev;
+		list_add(&attachment[i]->list, &mem->attachments);
+
+		va += bo_size;
+	}
+
+	return 0;
+
+unwind:
+	for (; i >= 0; i--) {
+		if (!attachment[i])
+			continue;
+		if (attachment[i]->bo_va) {
+			(void)amdgpu_bo_reserve(bo[i], true);
+			if (--attachment[i]->bo_va->ref_count == 0)
+				amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+			amdgpu_bo_unreserve(bo[i]);
+			list_del(&attachment[i]->list);
+		}
+		if (bo[i])
+			drm_gem_object_put(&bo[i]->tbo.base);
+		kfree(attachment[i]);
+	}
+	return ret;
+}
+
+static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
+{
+	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+	pr_debug("\t remove VA 0x%llx in entry %p\n",
+			attachment->va, attachment);
+	if (--attachment->bo_va->ref_count == 0)
+		amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+	drm_gem_object_put(&bo->tbo.base);
+	list_del(&attachment->list);
+	kfree(attachment);
+}
+
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+				struct amdkfd_process_info *process_info,
+				bool userptr)
+{
+	mutex_lock(&process_info->lock);
+	if (userptr)
+		list_add_tail(&mem->validate_list,
+			      &process_info->userptr_valid_list);
+	else
+		list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
+	mutex_unlock(&process_info->lock);
+}
+
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+		struct amdkfd_process_info *process_info)
+{
+	mutex_lock(&process_info->lock);
+	list_del(&mem->validate_list);
+	mutex_unlock(&process_info->lock);
+}
+
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+			   bool criu_resume)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	struct amdgpu_bo *bo = mem->bo;
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_hmm_range *range;
+	int ret = 0;
+
+	mutex_lock(&process_info->lock);
+
+	ret = amdgpu_ttm_tt_set_userptr(&bo->tbo, user_addr, 0);
+	if (ret) {
+		pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+		goto out;
+	}
+
+	ret = amdgpu_hmm_register(bo, user_addr);
+	if (ret) {
+		pr_err("%s: Failed to register MMU notifier: %d\n",
+		       __func__, ret);
+		goto out;
+	}
+
+	if (criu_resume) {
+		/*
+		 * During a CRIU restore operation, the userptr buffer objects
+		 * will be validated in the restore_userptr_work worker at a
+		 * later stage when it is scheduled by another ioctl called by
+		 * CRIU master process for the target pid for restore.
+		 */
+		mutex_lock(&process_info->notifier_lock);
+		mem->invalid++;
+		mutex_unlock(&process_info->notifier_lock);
+		mutex_unlock(&process_info->lock);
+		return 0;
+	}
+
+	range = amdgpu_hmm_range_alloc(NULL);
+	if (unlikely(!range)) {
+		ret = -ENOMEM;
+		goto unregister_out;
+	}
+
+	ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+	if (ret) {
+		amdgpu_hmm_range_free(range);
+		if (ret == -EAGAIN)
+			pr_debug("Failed to get user pages, try again\n");
+		else
+			pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+		goto unregister_out;
+	}
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("%s: Failed to reserve BO\n", __func__);
+		goto release_out;
+	}
+
+	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+	amdgpu_bo_placement_from_domain(bo, mem->domain);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		pr_err("%s: failed to validate BO\n", __func__);
+	amdgpu_bo_unreserve(bo);
+
+release_out:
+	amdgpu_hmm_range_free(range);
+unregister_out:
+	if (ret)
+		amdgpu_hmm_unregister(bo);
+out:
+	mutex_unlock(&process_info->lock);
+	return ret;
+}
+
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+	/* DRM execution context for the reservation */
+	struct drm_exec exec;
+	/* Number of VMs reserved */
+	unsigned int n_vms;
+	/* Pointer to sync object */
+	struct amdgpu_sync *sync;
+};
+
+enum bo_vm_match {
+	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
+	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
+	BO_VM_ALL,		/* Match all VMs a BO was added to    */
+};
+
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+			      struct amdgpu_vm *vm,
+			      struct bo_vm_reservation_context *ctx)
+{
+	struct amdgpu_bo *bo = mem->bo;
+	int ret;
+
+	WARN_ON(!vm);
+
+	ctx->n_vms = 1;
+	ctx->sync = &mem->sync;
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&ctx->exec) {
+		ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+		drm_exec_retry_on_contention(&ctx->exec);
+		if (unlikely(ret))
+			goto error;
+
+		ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+		drm_exec_retry_on_contention(&ctx->exec);
+		if (unlikely(ret))
+			goto error;
+	}
+	return 0;
+
+error:
+	pr_err("Failed to reserve buffers in ttm.\n");
+	drm_exec_fini(&ctx->exec);
+	return ret;
+}
+
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+				struct amdgpu_vm *vm, enum bo_vm_match map_type,
+				struct bo_vm_reservation_context *ctx)
+{
+	struct kfd_mem_attachment *entry;
+	struct amdgpu_bo *bo = mem->bo;
+	int ret;
+
+	ctx->sync = &mem->sync;
+	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&ctx->exec) {
+		ctx->n_vms = 0;
+		list_for_each_entry(entry, &mem->attachments, list) {
+			if ((vm && vm != entry->bo_va->base.vm) ||
+				(entry->is_mapped != map_type
+				&& map_type != BO_VM_ALL))
+				continue;
+
+			ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
+						&ctx->exec, 2);
+			drm_exec_retry_on_contention(&ctx->exec);
+			if (unlikely(ret))
+				goto error;
+			++ctx->n_vms;
+		}
+
+		ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
+		drm_exec_retry_on_contention(&ctx->exec);
+		if (unlikely(ret))
+			goto error;
+	}
+	return 0;
+
+error:
+	pr_err("Failed to reserve buffers in ttm.\n");
+	drm_exec_fini(&ctx->exec);
+	return ret;
+}
+
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+				 bool wait, bool intr)
+{
+	int ret = 0;
+
+	if (wait)
+		ret = amdgpu_sync_wait(ctx->sync, intr);
+
+	drm_exec_fini(&ctx->exec);
+	ctx->sync = NULL;
+	return ret;
+}
+
+static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
+				struct kfd_mem_attachment *entry,
+				struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_device *adev = entry->adev;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+
+	if (bo_va->queue_refcount) {
+		pr_debug("bo_va->queue_refcount %d\n", bo_va->queue_refcount);
+		return -EBUSY;
+	}
+
+	(void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+
+	/* VM entity stopped if process killed, don't clear freed pt bo */
+	if (!amdgpu_vm_ready(vm))
+		return 0;
+
+	(void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+	(void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
+
+	return 0;
+}
+
+static int update_gpuvm_pte(struct kgd_mem *mem,
+			    struct kfd_mem_attachment *entry,
+			    struct amdgpu_sync *sync)
+{
+	struct amdgpu_bo_va *bo_va = entry->bo_va;
+	struct amdgpu_device *adev = entry->adev;
+	int ret;
+
+	ret = kfd_mem_dmamap_attachment(mem, entry);
+	if (ret)
+		return ret;
+
+	/* Update the page tables  */
+	ret = amdgpu_vm_bo_update(adev, bo_va, false);
+	if (ret) {
+		pr_err("amdgpu_vm_bo_update failed\n");
+		return ret;
+	}
+
+	return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
+}
+
+static int map_bo_to_gpuvm(struct kgd_mem *mem,
+			   struct kfd_mem_attachment *entry,
+			   struct amdgpu_sync *sync,
+			   bool no_update_pte)
+{
+	int ret;
+
+	/* Set virtual address for the allocation */
+	ret = amdgpu_vm_bo_map(entry->adev, entry->bo_va, entry->va, 0,
+			       amdgpu_bo_size(entry->bo_va->base.bo),
+			       entry->pte_flags);
+	if (ret) {
+		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+				entry->va, ret);
+		return ret;
+	}
+
+	if (no_update_pte)
+		return 0;
+
+	ret = update_gpuvm_pte(mem, entry, sync);
+	if (ret) {
+		pr_err("update_gpuvm_pte() failed\n");
+		goto update_gpuvm_pte_failed;
+	}
+
+	return 0;
+
+update_gpuvm_pte_failed:
+	unmap_bo_from_gpuvm(mem, entry, sync);
+	kfd_mem_dmaunmap_attachment(mem, entry);
+	return ret;
+}
+
+static int process_validate_vms(struct amdkfd_process_info *process_info,
+				struct ww_acquire_ctx *ticket)
+{
+	struct amdgpu_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_validate_pt_pd_bos(peer_vm, ticket);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
+				 struct amdgpu_sync *sync)
+{
+	struct amdgpu_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *pd = peer_vm->root.bo;
+
+		ret = amdgpu_sync_resv(NULL, sync, pd->tbo.base.resv,
+				       AMDGPU_SYNC_NE_OWNER,
+				       AMDGPU_FENCE_OWNER_KFD);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+			      struct amdgpu_sync *sync)
+{
+	struct amdgpu_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_update_pds(peer_vm, sync);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+		       struct dma_fence **ef)
+{
+	struct amdkfd_process_info *info = NULL;
+	int ret;
+
+	if (!*process_info) {
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		mutex_init(&info->lock);
+		mutex_init(&info->notifier_lock);
+		INIT_LIST_HEAD(&info->vm_list_head);
+		INIT_LIST_HEAD(&info->kfd_bo_list);
+		INIT_LIST_HEAD(&info->userptr_valid_list);
+		INIT_LIST_HEAD(&info->userptr_inval_list);
+
+		info->eviction_fence =
+			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+						   current->mm,
+						   NULL);
+		if (!info->eviction_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto create_evict_fence_fail;
+		}
+
+		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+		INIT_DELAYED_WORK(&info->restore_userptr_work,
+				  amdgpu_amdkfd_restore_userptr_worker);
+
+		*process_info = info;
+	}
+
+	vm->process_info = *process_info;
+
+	/* Validate page directory and attach eviction fence */
+	ret = amdgpu_bo_reserve(vm->root.bo, true);
+	if (ret)
+		goto reserve_pd_fail;
+	ret = vm_validate_pt_pd_bos(vm, NULL);
+	if (ret) {
+		pr_err("validate_pt_pd_bos() failed\n");
+		goto validate_pd_fail;
+	}
+	ret = amdgpu_bo_sync_wait(vm->root.bo,
+				  AMDGPU_FENCE_OWNER_KFD, false);
+	if (ret)
+		goto wait_pd_fail;
+	ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
+	if (ret)
+		goto reserve_shared_fail;
+	dma_resv_add_fence(vm->root.bo->tbo.base.resv,
+			   &vm->process_info->eviction_fence->base,
+			   DMA_RESV_USAGE_BOOKKEEP);
+	amdgpu_bo_unreserve(vm->root.bo);
+
+	/* Update process info */
+	mutex_lock(&vm->process_info->lock);
+	list_add_tail(&vm->vm_list_node,
+			&(vm->process_info->vm_list_head));
+	vm->process_info->n_vms++;
+	if (ef)
+		*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
+	mutex_unlock(&vm->process_info->lock);
+
+	return 0;
+
+reserve_shared_fail:
+wait_pd_fail:
+validate_pd_fail:
+	amdgpu_bo_unreserve(vm->root.bo);
+reserve_pd_fail:
+	vm->process_info = NULL;
+	if (info) {
+		dma_fence_put(&info->eviction_fence->base);
+		*process_info = NULL;
+		put_pid(info->pid);
+create_evict_fence_fail:
+		mutex_destroy(&info->lock);
+		mutex_destroy(&info->notifier_lock);
+		kfree(info);
+	}
+	return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ *   - USERPTR BOs are UNPINNABLE and will return error
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ *     PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
+{
+	int ret = 0;
+
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret))
+		return ret;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+		/*
+		 * If bo is not contiguous on VRAM, move to system memory first to ensure
+		 * we can get contiguous VRAM space after evicting other BOs.
+		 */
+		if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+			struct ttm_operation_ctx ctx = { true, false };
+
+			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (unlikely(ret)) {
+				pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
+				goto out;
+			}
+		}
+	}
+
+	ret = amdgpu_bo_pin(bo, domain);
+	if (ret)
+		pr_err("Error in Pinning BO to domain: %d\n", domain);
+
+	amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+out:
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ *   - Is a illegal request for USERPTR BOs and is ignored
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ *     PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+	int ret = 0;
+
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret))
+		return;
+
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+					   struct amdgpu_vm *avm,
+					   void **process_info,
+					   struct dma_fence **ef)
+{
+	int ret;
+
+	/* Already a compute VM? */
+	if (avm->process_info)
+		return -EINVAL;
+
+	/* Convert VM into a compute VM */
+	ret = amdgpu_vm_make_compute(adev, avm);
+	if (ret)
+		return ret;
+
+	/* Initialize KFD part of the VM and process info */
+	ret = init_kfd_vm(avm, process_info, ef);
+	if (ret)
+		return ret;
+
+	amdgpu_vm_set_task_info(avm);
+
+	return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm)
+{
+	struct amdkfd_process_info *process_info = vm->process_info;
+
+	if (!process_info)
+		return;
+
+	/* Update process info */
+	mutex_lock(&process_info->lock);
+	process_info->n_vms--;
+	list_del(&vm->vm_list_node);
+	mutex_unlock(&process_info->lock);
+
+	vm->process_info = NULL;
+
+	/* Release per-process resources when last compute VM is destroyed */
+	if (!process_info->n_vms) {
+		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+		WARN_ON(!list_empty(&process_info->userptr_valid_list));
+		WARN_ON(!list_empty(&process_info->userptr_inval_list));
+
+		dma_fence_put(&process_info->eviction_fence->base);
+		cancel_delayed_work_sync(&process_info->restore_userptr_work);
+		put_pid(process_info->pid);
+		mutex_destroy(&process_info->lock);
+		mutex_destroy(&process_info->notifier_lock);
+		kfree(process_info);
+	}
+}
+
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
+{
+	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+	struct amdgpu_bo *pd = avm->root.bo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+
+	if (adev->asic_type < CHIP_VEGA10)
+		return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+	return avm->pd_phys_addr;
+}
+
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+	mutex_lock(&pinfo->lock);
+	WRITE_ONCE(pinfo->block_mmu_notifications, true);
+	mutex_unlock(&pinfo->lock);
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+	int ret = 0;
+	struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+	mutex_lock(&pinfo->lock);
+	pr_debug("scheduling work\n");
+	mutex_lock(&pinfo->notifier_lock);
+	pinfo->evicted_bos++;
+	mutex_unlock(&pinfo->notifier_lock);
+	if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+	WRITE_ONCE(pinfo->block_mmu_notifications, false);
+	queue_delayed_work(system_freezable_wq,
+			   &pinfo->restore_userptr_work, 0);
+
+out_unlock:
+	mutex_unlock(&pinfo->lock);
+	return ret;
+}
+
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+					  uint8_t xcp_id)
+{
+	uint64_t reserved_for_pt =
+		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
+	ssize_t available;
+	uint64_t vram_available, system_mem_available, ttm_mem_available;
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
+		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+			- adev->kfd.vram_used_aligned[xcp_id];
+	else
+		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+			- adev->kfd.vram_used_aligned[xcp_id]
+			- atomic64_read(&adev->vram_pin_size)
+			- reserved_for_pt
+			- reserved_for_ras;
+
+	if (adev->apu_prefer_gtt) {
+		system_mem_available = no_system_mem_limit ?
+					kfd_mem_limit.max_system_mem_limit :
+					kfd_mem_limit.max_system_mem_limit -
+					kfd_mem_limit.system_mem_used;
+
+		ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+				kfd_mem_limit.ttm_mem_used;
+
+		available = min3(system_mem_available, ttm_mem_available,
+				 vram_available);
+		available = ALIGN_DOWN(available, PAGE_SIZE);
+	} else {
+		available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+	}
+
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+	if (available < 0)
+		available = 0;
+
+	return available;
+}
+
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		struct amdgpu_device *adev, uint64_t va, uint64_t size,
+		void *drm_priv, struct kgd_mem **mem,
+		uint64_t *offset, uint32_t flags, bool criu_resume)
+{
+	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+	struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm);
+	enum ttm_bo_type bo_type = ttm_bo_type_device;
+	struct sg_table *sg = NULL;
+	uint64_t user_addr = 0;
+	struct amdgpu_bo *bo;
+	struct drm_gem_object *gobj = NULL;
+	u32 domain, alloc_domain;
+	uint64_t aligned_size;
+	int8_t xcp_id = -1;
+	u64 alloc_flags;
+	int ret;
+
+	/*
+	 * Check on which domain to allocate BO
+	 */
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+		if (adev->apu_prefer_gtt) {
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+			alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+			alloc_flags = 0;
+		} else {
+			alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+			alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
+			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
+
+			/* For contiguous VRAM allocation */
+			if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
+				alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		}
+		xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+					0 : fpriv->xcp_id;
+	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_flags = 0;
+	} else {
+		domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+		alloc_flags = AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+			if (!offset || !*offset)
+				return -EINVAL;
+			user_addr = untagged_addr(*offset);
+		} else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				    KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+			bo_type = ttm_bo_type_sg;
+			if (size > UINT_MAX)
+				return -EINVAL;
+			sg = create_sg_table(*offset, size);
+			if (!sg)
+				return -ENOMEM;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+		alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	INIT_LIST_HEAD(&(*mem)->attachments);
+	mutex_init(&(*mem)->lock);
+	(*mem)->aql_queue = !!(flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+
+	/* Workaround for AQL queue wraparound bug. Map the same
+	 * memory twice. That means we only actually allocate half
+	 * the memory.
+	 */
+	if ((*mem)->aql_queue)
+		size >>= 1;
+	aligned_size = PAGE_ALIGN(size);
+
+	(*mem)->alloc_flags = flags;
+
+	amdgpu_sync_create(&(*mem)->sync);
+
+	ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+					      xcp_id);
+	if (ret) {
+		pr_debug("Insufficient memory\n");
+		goto err_reserve_limit;
+	}
+
+	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n",
+		 va, (*mem)->aql_queue ? size << 1 : size,
+		 domain_string(alloc_domain), xcp_id);
+
+	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
+				       bo_type, NULL, &gobj, xcp_id + 1);
+	if (ret) {
+		pr_debug("Failed to create BO on domain %s. ret %d\n",
+			 domain_string(alloc_domain), ret);
+		goto err_bo_create;
+	}
+	ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
+	if (ret) {
+		pr_debug("Failed to allow vma node access. ret %d\n", ret);
+		goto err_node_allow;
+	}
+	ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
+	if (ret)
+		goto err_gem_handle_create;
+	bo = gem_to_amdgpu_bo(gobj);
+	if (bo_type == ttm_bo_type_sg) {
+		bo->tbo.sg = sg;
+		bo->tbo.ttm->sg = sg;
+	}
+	bo->kfd_bo = *mem;
+	(*mem)->bo = bo;
+	if (user_addr)
+		bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
+
+	(*mem)->va = va;
+	(*mem)->domain = domain;
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = avm->process_info;
+
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+	if (user_addr) {
+		pr_debug("creating userptr BO for user_addr = %llx\n", user_addr);
+		ret = init_user_pages(*mem, user_addr, criu_resume);
+		if (ret)
+			goto allocate_init_user_pages_failed;
+	} else  if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+		ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+		if (ret) {
+			pr_err("Pinning MMIO/DOORBELL BO during ALLOC FAILED\n");
+			goto err_pin_bo;
+		}
+		bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+		bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	} else {
+		mutex_lock(&avm->process_info->lock);
+		if (avm->process_info->eviction_fence &&
+		    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+			ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
+				&avm->process_info->eviction_fence->base);
+		mutex_unlock(&avm->process_info->lock);
+		if (ret)
+			goto err_validate_bo;
+	}
+
+	if (offset)
+		*offset = amdgpu_bo_mmap_offset(bo);
+
+	return 0;
+
+allocate_init_user_pages_failed:
+err_pin_bo:
+err_validate_bo:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle);
+err_gem_handle_create:
+	drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+err_node_allow:
+	/* Don't unreserve system mem limit twice */
+	goto err_reserve_limit;
+err_bo_create:
+	amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
+err_reserve_limit:
+	amdgpu_sync_free(&(*mem)->sync);
+	mutex_destroy(&(*mem)->lock);
+	if (gobj)
+		drm_gem_object_put(gobj);
+	else
+		kfree(*mem);
+err:
+	if (sg) {
+		sg_free_table(sg);
+		kfree(sg);
+	}
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
+		uint64_t *size)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	unsigned long bo_size = mem->bo->tbo.base.size;
+	bool use_release_notifier = (mem->bo->kfd_bo == mem);
+	struct kfd_mem_attachment *entry, *tmp;
+	struct bo_vm_reservation_context ctx;
+	unsigned int mapped_to_gpu_memory;
+	int ret;
+	bool is_imported = false;
+
+	mutex_lock(&mem->lock);
+
+	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+	if (mem->alloc_flags &
+	    (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+	     KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+		amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+	}
+
+	mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
+	is_imported = mem->is_imported;
+	mutex_unlock(&mem->lock);
+	/* lock is not needed after this, since mem is unused and will
+	 * be freed anyway
+	 */
+
+	if (mapped_to_gpu_memory > 0) {
+		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+				mem->va, bo_size);
+		return -EBUSY;
+	}
+
+	/* Make sure restore workers don't access the BO any more */
+	mutex_lock(&process_info->lock);
+	list_del(&mem->validate_list);
+	mutex_unlock(&process_info->lock);
+
+	/* Cleanup user pages and MMU notifiers */
+	if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+		amdgpu_hmm_unregister(mem->bo);
+		mutex_lock(&process_info->notifier_lock);
+		amdgpu_hmm_range_free(mem->range);
+		mutex_unlock(&process_info->notifier_lock);
+	}
+
+	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+	if (unlikely(ret))
+		return ret;
+
+	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+					process_info->eviction_fence);
+	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue));
+
+	/* Remove from VM internal data structures */
+	list_for_each_entry_safe(entry, tmp, &mem->attachments, list) {
+		kfd_mem_dmaunmap_attachment(mem, entry);
+		kfd_mem_detach(entry);
+	}
+
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	/* Free the sync object */
+	amdgpu_sync_free(&mem->sync);
+
+	/* If the SG is not NULL, it's one we created for a doorbell or mmio
+	 * remap BO. We need to free it.
+	 */
+	if (mem->bo->tbo.sg) {
+		sg_free_table(mem->bo->tbo.sg);
+		kfree(mem->bo->tbo.sg);
+	}
+
+	/* Update the size of the BO being freed if it was allocated from
+	 * VRAM and is not imported. For APP APU VRAM allocations are done
+	 * in GTT domain
+	 */
+	if (size) {
+		if (!is_imported &&
+		   mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+			*size = bo_size;
+		else
+			*size = 0;
+	}
+
+	/* Free the BO*/
+	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
+	if (mem->dmabuf) {
+		dma_buf_put(mem->dmabuf);
+		mem->dmabuf = NULL;
+	}
+	mutex_destroy(&mem->lock);
+
+	/* If this releases the last reference, it will end up calling
+	 * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
+	 * this needs to be the last call here.
+	 */
+	drm_gem_object_put(&mem->bo->tbo.base);
+
+	/*
+	 * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+	 * explicitly free it here.
+	 */
+	if (!use_release_notifier)
+		kfree(mem);
+
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+		struct amdgpu_device *adev, struct kgd_mem *mem,
+		void *drm_priv)
+{
+	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+	int ret;
+	struct amdgpu_bo *bo;
+	uint32_t domain;
+	struct kfd_mem_attachment *entry;
+	struct bo_vm_reservation_context ctx;
+	unsigned long bo_size;
+	bool is_invalid_userptr = false;
+
+	bo = mem->bo;
+	if (!bo) {
+		pr_err("Invalid BO when mapping memory to GPU\n");
+		return -EINVAL;
+	}
+
+	/* Make sure restore is not running concurrently. Since we
+	 * don't map invalid userptr BOs, we rely on the next restore
+	 * worker to do the mapping
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	/* Lock notifier lock. If we find an invalid userptr BO, we can be
+	 * sure that the MMU notifier is no longer running
+	 * concurrently and the queues are actually stopped
+	 */
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		mutex_lock(&mem->process_info->notifier_lock);
+		is_invalid_userptr = !!mem->invalid;
+		mutex_unlock(&mem->process_info->notifier_lock);
+	}
+
+	mutex_lock(&mem->lock);
+
+	domain = mem->domain;
+	bo_size = bo->tbo.base.size;
+
+	pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+			mem->va,
+			mem->va + bo_size * (1 + mem->aql_queue),
+			avm, domain_string(domain));
+
+	if (!kfd_mem_is_attached(avm, mem)) {
+		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
+		if (ret)
+			goto out;
+	}
+
+	ret = reserve_bo_and_vm(mem, avm, &ctx);
+	if (unlikely(ret))
+		goto out;
+
+	/* Userptr can be marked as "not invalid", but not actually be
+	 * validated yet (still in the system domain). In that case
+	 * the queues are still stopped and we can leave mapping for
+	 * the next restore worker
+	 */
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
+	    bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+		is_invalid_userptr = true;
+
+	ret = vm_validate_pt_pd_bos(avm, NULL);
+	if (unlikely(ret))
+		goto out_unreserve;
+
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->bo_va->base.vm != avm || entry->is_mapped)
+			continue;
+
+		pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+			 entry->va, entry->va + bo_size, entry);
+
+		ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
+				      is_invalid_userptr);
+		if (ret) {
+			pr_err("Failed to map bo to gpuvm\n");
+			goto out_unreserve;
+		}
+
+		ret = vm_update_pds(avm, ctx.sync);
+		if (ret) {
+			pr_err("Failed to update page directories\n");
+			goto out_unreserve;
+		}
+
+		entry->is_mapped = true;
+		mem->mapped_to_gpu_memory++;
+		pr_debug("\t INC mapping count %d\n",
+			 mem->mapped_to_gpu_memory);
+	}
+
+	ret = unreserve_bo_and_vms(&ctx, false, false);
+
+	goto out;
+
+out_unreserve:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->process_info->lock);
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+{
+	struct kfd_mem_attachment *entry;
+	struct amdgpu_vm *vm;
+	int ret;
+
+	vm = drm_priv_to_vm(drm_priv);
+
+	mutex_lock(&mem->lock);
+
+	ret = amdgpu_bo_reserve(mem->bo, true);
+	if (ret)
+		goto out;
+
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->bo_va->base.vm != vm)
+			continue;
+		if (entry->bo_va->base.bo->tbo.ttm &&
+		    !entry->bo_va->base.bo->tbo.ttm->sg)
+			continue;
+
+		kfd_mem_dmaunmap_attachment(mem, entry);
+	}
+
+	amdgpu_bo_unreserve(mem->bo);
+out:
+	mutex_unlock(&mem->lock);
+
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)
+{
+	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+	unsigned long bo_size = mem->bo->tbo.base.size;
+	struct kfd_mem_attachment *entry;
+	struct bo_vm_reservation_context ctx;
+	int ret;
+
+	mutex_lock(&mem->lock);
+
+	ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
+	if (unlikely(ret))
+		goto out;
+	/* If no VMs were reserved, it means the BO wasn't actually mapped */
+	if (ctx.n_vms == 0) {
+		ret = -EINVAL;
+		goto unreserve_out;
+	}
+
+	ret = vm_validate_pt_pd_bos(avm, NULL);
+	if (unlikely(ret))
+		goto unreserve_out;
+
+	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+		mem->va,
+		mem->va + bo_size * (1 + mem->aql_queue),
+		avm);
+
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->bo_va->base.vm != avm || !entry->is_mapped)
+			continue;
+
+		pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+			 entry->va, entry->va + bo_size, entry);
+
+		ret = unmap_bo_from_gpuvm(mem, entry, ctx.sync);
+		if (ret)
+			goto unreserve_out;
+
+		entry->is_mapped = false;
+
+		mem->mapped_to_gpu_memory--;
+		pr_debug("\t DEC mapping count %d\n",
+			 mem->mapped_to_gpu_memory);
+	}
+
+unreserve_out:
+	unreserve_bo_and_vms(&ctx, false, false);
+out:
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr)
+{
+	struct amdgpu_sync sync;
+	int ret;
+
+	amdgpu_sync_create(&sync);
+
+	mutex_lock(&mem->lock);
+	amdgpu_sync_clone(&mem->sync, &sync);
+	mutex_unlock(&mem->lock);
+
+	ret = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+	return ret;
+}
+
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @bo: Buffer object to be mapped
+ * @bo_gart: Return bo reference
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart)
+{
+	int ret;
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("Failed to reserve bo. ret %d\n", ret);
+		goto err_reserve_bo_failed;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (ret) {
+		pr_err("Failed to pin bo. ret %d\n", ret);
+		goto err_pin_bo_failed;
+	}
+
+	ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (ret) {
+		pr_err("Failed to bind bo to GART. ret %d\n", ret);
+		goto err_map_bo_gart_failed;
+	}
+
+	amdgpu_amdkfd_remove_eviction_fence(
+		bo, bo->vm_bo->vm->process_info->eviction_fence);
+
+	amdgpu_bo_unreserve(bo);
+
+	*bo_gart = amdgpu_bo_ref(bo);
+
+	return 0;
+
+err_map_bo_gart_failed:
+	amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+	amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+	return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be mapped for CPU access
+ * @kptr[out]: pointer in kernel CPU address space
+ * @size[out]: size of the buffer
+ *
+ * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
+ * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
+ * validate_list, so the GPU mapping can be restored after a page table was
+ * evicted.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+					     void **kptr, uint64_t *size)
+{
+	int ret;
+	struct amdgpu_bo *bo = mem->bo;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		pr_err("userptr can't be mapped to kernel\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&mem->process_info->lock);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("Failed to reserve bo. ret %d\n", ret);
+		goto bo_reserve_failed;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (ret) {
+		pr_err("Failed to pin bo. ret %d\n", ret);
+		goto pin_failed;
+	}
+
+	ret = amdgpu_bo_kmap(bo, kptr);
+	if (ret) {
+		pr_err("Failed to map bo to kernel. ret %d\n", ret);
+		goto kmap_failed;
+	}
+
+	amdgpu_amdkfd_remove_eviction_fence(
+		bo, mem->process_info->eviction_fence);
+
+	if (size)
+		*size = amdgpu_bo_size(bo);
+
+	amdgpu_bo_unreserve(bo);
+
+	mutex_unlock(&mem->process_info->lock);
+	return 0;
+
+kmap_failed:
+	amdgpu_bo_unpin(bo);
+pin_failed:
+	amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+	mutex_unlock(&mem->process_info->lock);
+
+	return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be unmapped for CPU access
+ *
+ * Removes the kernel CPU mapping and unpins the BO. It does not restore the
+ * eviction fence, so this function should only be used for cleanup before the
+ * BO is destroyed.
+ */
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
+{
+	struct amdgpu_bo *bo = mem->bo;
+
+	(void)amdgpu_bo_reserve(bo, true);
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_unreserve(bo);
+}
+
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
+					  struct kfd_vm_fault_info *mem)
+{
+	if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
+		*mem = *adev->gmc.vm_fault_info;
+		atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
+	}
+	return 0;
+}
+
+static int import_obj_create(struct amdgpu_device *adev,
+			     struct dma_buf *dma_buf,
+			     struct drm_gem_object *obj,
+			     uint64_t va, void *drm_priv,
+			     struct kgd_mem **mem, uint64_t *size,
+			     uint64_t *mmap_offset)
+{
+	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+	struct amdgpu_bo *bo;
+	int ret;
+
+	bo = gem_to_amdgpu_bo(obj);
+	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT)))
+		/* Only VRAM and GTT BOs are supported */
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+
+	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+	if (ret)
+		goto err_free_mem;
+
+	if (size)
+		*size = amdgpu_bo_size(bo);
+
+	if (mmap_offset)
+		*mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+	INIT_LIST_HEAD(&(*mem)->attachments);
+	mutex_init(&(*mem)->lock);
+
+	(*mem)->alloc_flags =
+		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
+		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
+		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+
+	get_dma_buf(dma_buf);
+	(*mem)->dmabuf = dma_buf;
+	(*mem)->bo = bo;
+	(*mem)->va = va;
+	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
+			 !adev->apu_prefer_gtt ?
+			 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = avm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+	(*mem)->is_imported = true;
+
+	mutex_lock(&avm->process_info->lock);
+	if (avm->process_info->eviction_fence &&
+	    !dma_fence_is_signaled(&avm->process_info->eviction_fence->base))
+		ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
+				&avm->process_info->eviction_fence->base);
+	mutex_unlock(&avm->process_info->lock);
+	if (ret)
+		goto err_remove_mem;
+
+	return 0;
+
+err_remove_mem:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+	drm_vma_node_revoke(&obj->vma_node, drm_priv);
+err_free_mem:
+	kfree(*mem);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
+					 uint64_t va, void *drm_priv,
+					 struct kgd_mem **mem, uint64_t *size,
+					 uint64_t *mmap_offset)
+{
+	struct drm_gem_object *obj;
+	uint32_t handle;
+	int ret;
+
+	ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
+					 &handle);
+	if (ret)
+		return ret;
+	obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
+	if (!obj) {
+		ret = -EINVAL;
+		goto err_release_handle;
+	}
+
+	ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size,
+				mmap_offset);
+	if (ret)
+		goto err_put_obj;
+
+	(*mem)->gem_handle = handle;
+
+	return 0;
+
+err_put_obj:
+	drm_gem_object_put(obj);
+err_release_handle:
+	drm_gem_handle_delete(adev->kfd.client.file, handle);
+	return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
+				      struct dma_buf **dma_buf)
+{
+	int ret;
+
+	mutex_lock(&mem->lock);
+	ret = kfd_mem_export_dmabuf(mem);
+	if (ret)
+		goto out;
+
+	get_dma_buf(mem->dmabuf);
+	*dma_buf = mem->dmabuf;
+out:
+	mutex_unlock(&mem->lock);
+	return ret;
+}
+
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	int r = 0;
+
+	/* Do not process MMU notifications during CRIU restore until
+	 * KFD_CRIU_OP_RESUME IOCTL is received
+	 */
+	if (READ_ONCE(process_info->block_mmu_notifications))
+		return 0;
+
+	mutex_lock(&process_info->notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	mem->invalid++;
+	if (++process_info->evicted_bos == 1) {
+		/* First eviction, stop the queues */
+		r = kgd2kfd_quiesce_mm(mni->mm,
+				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+
+		if (r && r != -ESRCH)
+			pr_err("Failed to quiesce KFD\n");
+
+		if (r != -ESRCH)
+			queue_delayed_work(system_freezable_wq,
+				&process_info->restore_userptr_work,
+				msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+	}
+	mutex_unlock(&process_info->notifier_lock);
+
+	return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+				     struct mm_struct *mm)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	uint32_t invalid;
+	int ret = 0;
+
+	mutex_lock(&process_info->notifier_lock);
+
+	/* Move all invalidated BOs to the userptr_inval_list */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_valid_list,
+				 validate_list)
+		if (mem->invalid)
+			list_move_tail(&mem->validate_list,
+				       &process_info->userptr_inval_list);
+
+	/* Go through userptr_inval_list and update any invalid user_pages */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list) {
+		invalid = mem->invalid;
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its page list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		amdgpu_hmm_range_free(mem->range);
+		mem->range = NULL;
+
+		/* BO reservations and getting user pages (hmm_range_fault)
+		 * must happen outside the notifier lock
+		 */
+		mutex_unlock(&process_info->notifier_lock);
+
+		/* Move the BO to system (CPU) domain if necessary to unmap
+		 * and free the SG table
+		 */
+		if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+			if (amdgpu_bo_reserve(bo, true))
+				return -EAGAIN;
+			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			amdgpu_bo_unreserve(bo);
+			if (ret) {
+				pr_err("%s: Failed to invalidate userptr BO\n",
+				       __func__);
+				return -EAGAIN;
+			}
+		}
+
+		mem->range = amdgpu_hmm_range_alloc(NULL);
+		if (unlikely(!mem->range))
+			return -ENOMEM;
+		/* Get updated user pages */
+		ret = amdgpu_ttm_tt_get_user_pages(bo, mem->range);
+		if (ret) {
+			amdgpu_hmm_range_free(mem->range);
+			mem->range = NULL;
+			pr_debug("Failed %d to get user pages\n", ret);
+
+			/* Return -EFAULT bad address error as success. It will
+			 * fail later with a VM fault if the GPU tries to access
+			 * it. Better than hanging indefinitely with stalled
+			 * user mode queues.
+			 *
+			 * Return other error -EBUSY or -ENOMEM to retry restore
+			 */
+			if (ret != -EFAULT)
+				return ret;
+
+			/* If applications unmap memory before destroying the userptr
+			 * from the KFD, trigger a segmentation fault in VM debug mode.
+			 */
+			if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+				struct kfd_process *p;
+
+				pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+								pid_nr(process_info->pid), mem->va);
+
+				// Send GPU VM fault to user space
+				p = kfd_lookup_process_by_pid(process_info->pid);
+				if (p) {
+					kfd_signal_vm_fault_event_with_userptr(p, mem->va);
+					kfd_unref_process(p);
+				}
+			}
+
+			ret = 0;
+		}
+
+		amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->range);
+
+		mutex_lock(&process_info->notifier_lock);
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently.
+		 */
+		if (mem->invalid != invalid) {
+			ret = -EAGAIN;
+			goto unlock_out;
+		}
+		 /* set mem valid if mem has hmm range associated */
+		if (mem->range)
+			mem->invalid = 0;
+	}
+
+unlock_out:
+	mutex_unlock(&process_info->notifier_lock);
+
+	return ret;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_sync sync;
+	struct drm_exec exec;
+
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	int ret;
+
+	amdgpu_sync_create(&sync);
+
+	drm_exec_init(&exec, 0, 0);
+	/* Reserve all BOs and page tables for validation */
+	drm_exec_until_all_locked(&exec) {
+		/* Reserve all the page directories */
+		list_for_each_entry(peer_vm, &process_info->vm_list_head,
+				    vm_list_node) {
+			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret))
+				goto unreserve_out;
+		}
+
+		/* Reserve the userptr_inval_list entries to resv_list */
+		list_for_each_entry(mem, &process_info->userptr_inval_list,
+				    validate_list) {
+			struct drm_gem_object *gobj;
+
+			gobj = &mem->bo->tbo.base;
+			ret = drm_exec_prepare_obj(&exec, gobj, 1);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret))
+				goto unreserve_out;
+		}
+	}
+
+	ret = process_validate_vms(process_info, NULL);
+	if (ret)
+		goto unreserve_out;
+
+	/* Validate BOs and update GPUVM page tables */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list) {
+		struct kfd_mem_attachment *attachment;
+
+		bo = mem->bo;
+
+		/* Validate the BO if we got user pages */
+		if (bo->tbo.ttm->pages[0]) {
+			amdgpu_bo_placement_from_domain(bo, mem->domain);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret) {
+				pr_err("%s: failed to validate BO\n", __func__);
+				goto unreserve_out;
+			}
+		}
+
+		/* Update mapping. If the BO was not validated
+		 * (because we couldn't get user pages), this will
+		 * clear the page table entries, which will result in
+		 * VM faults if the GPU tries to access the invalid
+		 * memory.
+		 */
+		list_for_each_entry(attachment, &mem->attachments, list) {
+			if (!attachment->is_mapped)
+				continue;
+
+			kfd_mem_dmaunmap_attachment(mem, attachment);
+			ret = update_gpuvm_pte(mem, attachment, &sync);
+			if (ret) {
+				pr_err("%s: update PTE failed\n", __func__);
+				/* make sure this gets validated again */
+				mutex_lock(&process_info->notifier_lock);
+				mem->invalid++;
+				mutex_unlock(&process_info->notifier_lock);
+				goto unreserve_out;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+	drm_exec_fini(&exec);
+	amdgpu_sync_wait(&sync, false);
+	amdgpu_sync_free(&sync);
+
+	return ret;
+}
+
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	int ret = 0;
+
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list) {
+		bool valid;
+
+		/* keep mem without hmm range at userptr_inval_list */
+		if (!mem->range)
+			continue;
+
+		/* Only check mem with hmm range associated */
+		valid = amdgpu_hmm_range_valid(mem->range);
+		amdgpu_hmm_range_free(mem->range);
+
+		mem->range = NULL;
+		if (!valid) {
+			WARN(!mem->invalid, "Invalid BO not marked invalid");
+			ret = -EAGAIN;
+			continue;
+		}
+
+		if (mem->invalid) {
+			WARN(1, "Valid BO is marked invalid");
+			ret = -EAGAIN;
+			continue;
+		}
+
+		list_move_tail(&mem->validate_list,
+			       &process_info->userptr_valid_list);
+	}
+
+	return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info =
+		container_of(dwork, struct amdkfd_process_info,
+			     restore_userptr_work);
+	struct task_struct *usertask;
+	struct mm_struct *mm;
+	uint32_t evicted_bos;
+
+	mutex_lock(&process_info->notifier_lock);
+	evicted_bos = process_info->evicted_bos;
+	mutex_unlock(&process_info->notifier_lock);
+	if (!evicted_bos)
+		return;
+
+	/* Reference task and mm in case of concurrent process termination */
+	usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+	if (!usertask)
+		return;
+	mm = get_task_mm(usertask);
+	if (!mm) {
+		put_task_struct(usertask);
+		return;
+	}
+
+	mutex_lock(&process_info->lock);
+
+	if (update_invalid_user_pages(process_info, mm))
+		goto unlock_out;
+	/* userptr_inval_list can be empty if all evicted userptr BOs
+	 * have been freed. In that case there is nothing to validate
+	 * and we can just restart the queues.
+	 */
+	if (!list_empty(&process_info->userptr_inval_list)) {
+		if (validate_invalid_user_pages(process_info))
+			goto unlock_out;
+	}
+	/* Final check for concurrent evicton and atomic update. If
+	 * another eviction happens after successful update, it will
+	 * be a first eviction that calls quiesce_mm. The eviction
+	 * reference counting inside KFD will handle this case.
+	 */
+	mutex_lock(&process_info->notifier_lock);
+	if (process_info->evicted_bos != evicted_bos)
+		goto unlock_notifier_out;
+
+	if (confirm_valid_user_pages_locked(process_info)) {
+		WARN(1, "User pages unexpectedly invalid");
+		goto unlock_notifier_out;
+	}
+
+	process_info->evicted_bos = evicted_bos = 0;
+
+	if (kgd2kfd_resume_mm(mm)) {
+		pr_err("%s: Failed to resume KFD\n", __func__);
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+	}
+
+unlock_notifier_out:
+	mutex_unlock(&process_info->notifier_lock);
+unlock_out:
+	mutex_unlock(&process_info->lock);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_bos) {
+		queue_delayed_work(system_freezable_wq,
+			&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+		kfd_smi_event_queue_restore_rescheduled(mm);
+	}
+	mmput(mm);
+	put_task_struct(usertask);
+}
+
+static void replace_eviction_fence(struct dma_fence __rcu **ef,
+				   struct dma_fence *new_ef)
+{
+	struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true
+		/* protected by process_info->lock */);
+
+	/* If we're replacing an unsignaled eviction fence, that fence will
+	 * never be signaled, and if anyone is still waiting on that fence,
+	 * they will hang forever. This should never happen. We should only
+	 * replace the fence in restore_work that only gets scheduled after
+	 * eviction work signaled the fence.
+	 */
+	WARN_ONCE(!dma_fence_is_signaled(old_ef),
+		  "Replacing unsignaled eviction fence");
+	dma_fence_put(old_ef);
+}
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ *   KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1.  Release old eviction fence and create new one
+ * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ *     BOs that need to be reserved.
+ * 4.  Reserve all the BOs
+ * 5.  Validate of PD and PT BOs.
+ * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7.  Add fence to all PD and PT BOs.
+ * 8.  Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef)
+{
+	struct amdkfd_process_info *process_info = info;
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem;
+	struct list_head duplicate_save;
+	struct amdgpu_sync sync_obj;
+	unsigned long failed_size = 0;
+	unsigned long total_size = 0;
+	struct drm_exec exec;
+	int ret;
+
+	INIT_LIST_HEAD(&duplicate_save);
+
+	mutex_lock(&process_info->lock);
+
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		list_for_each_entry(peer_vm, &process_info->vm_list_head,
+				    vm_list_node) {
+			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret)) {
+				pr_err("Locking VM PD failed, ret: %d\n", ret);
+				goto ttm_reserve_fail;
+			}
+		}
+
+		/* Reserve all BOs and page tables/directory. Add all BOs from
+		 * kfd_bo_list to ctx.list
+		 */
+		list_for_each_entry(mem, &process_info->kfd_bo_list,
+				    validate_list) {
+			struct drm_gem_object *gobj;
+
+			gobj = &mem->bo->tbo.base;
+			ret = drm_exec_prepare_obj(&exec, gobj, 1);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret)) {
+				pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
+				goto ttm_reserve_fail;
+			}
+		}
+	}
+
+	amdgpu_sync_create(&sync_obj);
+
+	/* Validate BOs managed by KFD */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list) {
+
+		struct amdgpu_bo *bo = mem->bo;
+		uint32_t domain = mem->domain;
+		struct dma_resv_iter cursor;
+		struct dma_fence *fence;
+
+		total_size += amdgpu_bo_size(bo);
+
+		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate BOs failed\n");
+			failed_size += amdgpu_bo_size(bo);
+			ret = amdgpu_amdkfd_bo_validate(bo,
+						AMDGPU_GEM_DOMAIN_GTT, false);
+			if (ret) {
+				pr_debug("Memory eviction: Try again\n");
+				goto validate_map_fail;
+			}
+		}
+		dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+					DMA_RESV_USAGE_KERNEL, fence) {
+			ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
+			if (ret) {
+				pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+				goto validate_map_fail;
+			}
+		}
+	}
+
+	if (failed_size)
+		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+	/* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+	 * validations above would invalidate DMABuf imports again.
+	 */
+	ret = process_validate_vms(process_info, &exec.ticket);
+	if (ret) {
+		pr_debug("Validating VMs failed, ret: %d\n", ret);
+		goto validate_map_fail;
+	}
+
+	/* Update mappings managed by KFD. */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list) {
+		struct kfd_mem_attachment *attachment;
+
+		list_for_each_entry(attachment, &mem->attachments, list) {
+			if (!attachment->is_mapped)
+				continue;
+
+			kfd_mem_dmaunmap_attachment(mem, attachment);
+			ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+			if (ret) {
+				pr_debug("Memory eviction: update PTE failed. Try again\n");
+				goto validate_map_fail;
+			}
+		}
+	}
+
+	/* Update mappings not managed by KFD */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node) {
+		struct amdgpu_device *adev = amdgpu_ttm_adev(
+			peer_vm->root.bo->tbo.bdev);
+
+		struct amdgpu_fpriv *fpriv =
+			container_of(peer_vm, struct amdgpu_fpriv, vm);
+
+		ret = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+		if (ret) {
+			dev_dbg(adev->dev,
+				"Memory eviction: handle PRT moved failed, pid %8d. Try again.\n",
+				pid_nr(process_info->pid));
+			goto validate_map_fail;
+		}
+
+		ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket);
+		if (ret) {
+			dev_dbg(adev->dev,
+				"Memory eviction: handle moved failed, pid %8d. Try again.\n",
+				pid_nr(process_info->pid));
+			goto validate_map_fail;
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: update PDs failed. Try again\n");
+		goto validate_map_fail;
+	}
+
+	/* Sync with fences on all the page tables. They implicitly depend on any
+	 * move fences from amdgpu_vm_handle_moved above.
+	 */
+	ret = process_sync_pds_resv(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+		goto validate_map_fail;
+	}
+
+	/* Wait for validate and PT updates to finish */
+	amdgpu_sync_wait(&sync_obj, false);
+
+	/* The old eviction fence may be unsignaled if restore happens
+	 * after a GPU reset or suspend/resume. Keep the old fence in that
+	 * case. Otherwise release the old eviction fence and create new
+	 * one, because fence only goes from unsignaled to signaled once
+	 * and cannot be reused. Use context and mm from the old fence.
+	 *
+	 * If an old eviction fence signals after this check, that's OK.
+	 * Anyone signaling an eviction fence must stop the queues first
+	 * and schedule another restore worker.
+	 */
+	if (dma_fence_is_signaled(&process_info->eviction_fence->base)) {
+		struct amdgpu_amdkfd_fence *new_fence =
+			amdgpu_amdkfd_fence_create(
+				process_info->eviction_fence->base.context,
+				process_info->eviction_fence->mm,
+				NULL);
+
+		if (!new_fence) {
+			pr_err("Failed to create eviction fence\n");
+			ret = -ENOMEM;
+			goto validate_map_fail;
+		}
+		dma_fence_put(&process_info->eviction_fence->base);
+		process_info->eviction_fence = new_fence;
+		replace_eviction_fence(ef, dma_fence_get(&new_fence->base));
+	} else {
+		WARN_ONCE(*ef != &process_info->eviction_fence->base,
+			  "KFD eviction fence doesn't match KGD process_info");
+	}
+
+	/* Attach new eviction fence to all BOs except pinned ones */
+	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
+		if (mem->bo->tbo.pin_count)
+			continue;
+
+		dma_resv_add_fence(mem->bo->tbo.base.resv,
+				   &process_info->eviction_fence->base,
+				   DMA_RESV_USAGE_BOOKKEEP);
+	}
+	/* Attach eviction fence to PD / PT BOs and DMABuf imports */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->root.bo;
+
+		dma_resv_add_fence(bo->tbo.base.resv,
+				   &process_info->eviction_fence->base,
+				   DMA_RESV_USAGE_BOOKKEEP);
+	}
+
+validate_map_fail:
+	amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+	drm_exec_fini(&exec);
+	mutex_unlock(&process_info->lock);
+	return ret;
+}
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+	int ret;
+
+	if (!info || !gws)
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+
+	mutex_init(&(*mem)->lock);
+	INIT_LIST_HEAD(&(*mem)->attachments);
+	(*mem)->bo = amdgpu_bo_ref(gws_bo);
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+	(*mem)->process_info = process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+
+
+	/* Validate gws bo the first time it is added to process */
+	mutex_lock(&(*mem)->process_info->lock);
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		goto bo_reservation_failure;
+	}
+
+	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+	if (ret) {
+		pr_err("GWS BO validate failed %d\n", ret);
+		goto bo_validation_failure;
+	}
+	/* GWS resource is shared b/t amdgpu and amdkfd
+	 * Add process eviction fence to bo so they can
+	 * evict each other.
+	 */
+	ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
+	if (ret)
+		goto reserve_shared_fail;
+	dma_resv_add_fence(gws_bo->tbo.base.resv,
+			   &process_info->eviction_fence->base,
+			   DMA_RESV_USAGE_BOOKKEEP);
+	amdgpu_bo_unreserve(gws_bo);
+	mutex_unlock(&(*mem)->process_info->lock);
+
+	return ret;
+
+reserve_shared_fail:
+bo_validation_failure:
+	amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+	mutex_unlock(&(*mem)->process_info->lock);
+	amdgpu_sync_free(&(*mem)->sync);
+	remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	*mem = NULL;
+	return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+	int ret;
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+	struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+	/* Remove BO from process's validate list so restore worker won't touch
+	 * it anymore
+	 */
+	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		//TODO add BO back to validate_list?
+		return ret;
+	}
+	amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+			process_info->eviction_fence);
+	amdgpu_bo_unreserve(gws_bo);
+	amdgpu_sync_free(&kgd_mem->sync);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&kgd_mem->lock);
+	kfree(mem);
+	return 0;
+}
+
+/* Returns GPU-specific tiling mode information */
+int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
+				struct tile_config *config)
+{
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	/* Those values are not set from GFX9 onwards */
+	config->num_banks = adev->gfx.config.num_banks;
+	config->num_ranks = adev->gfx.config.num_ranks;
+
+	return 0;
+}
+
+bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem)
+{
+	struct amdgpu_vm *vm = drm_priv_to_vm(drm_priv);
+	struct kfd_mem_attachment *entry;
+
+	list_for_each_entry(entry, &mem->attachments, list) {
+		if (entry->is_mapped && entry->bo_va->base.vm == vm)
+			return true;
+	}
+	return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
+{
+
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	seq_printf(m, "System mem used %lldM out of %lluM\n",
+		  (kfd_mem_limit.system_mem_used >> 20),
+		  (kfd_mem_limit.max_system_mem_limit >> 20));
+	seq_printf(m, "TTM mem used %lldM out of %lluM\n",
+		  (kfd_mem_limit.ttm_mem_used >> 20),
+		  (kfd_mem_limit.max_ttm_mem_limit >> 20));
+	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
new file mode 100644
index 000000000000..763f2b8dcf13
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -0,0 +1,1951 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
+
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "bif/bif_4_1_d.h"
+
+static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
+{
+	struct amdgpu_i2c_bus_rec i2c;
+
+	memset(&i2c, 0, sizeof(struct amdgpu_i2c_bus_rec));
+
+	i2c.mask_clk_reg = le16_to_cpu(gpio->usClkMaskRegisterIndex);
+	i2c.mask_data_reg = le16_to_cpu(gpio->usDataMaskRegisterIndex);
+	i2c.en_clk_reg = le16_to_cpu(gpio->usClkEnRegisterIndex);
+	i2c.en_data_reg = le16_to_cpu(gpio->usDataEnRegisterIndex);
+	i2c.y_clk_reg = le16_to_cpu(gpio->usClkY_RegisterIndex);
+	i2c.y_data_reg = le16_to_cpu(gpio->usDataY_RegisterIndex);
+	i2c.a_clk_reg = le16_to_cpu(gpio->usClkA_RegisterIndex);
+	i2c.a_data_reg = le16_to_cpu(gpio->usDataA_RegisterIndex);
+	i2c.mask_clk_mask = (1 << gpio->ucClkMaskShift);
+	i2c.mask_data_mask = (1 << gpio->ucDataMaskShift);
+	i2c.en_clk_mask = (1 << gpio->ucClkEnShift);
+	i2c.en_data_mask = (1 << gpio->ucDataEnShift);
+	i2c.y_clk_mask = (1 << gpio->ucClkY_Shift);
+	i2c.y_data_mask = (1 << gpio->ucDataY_Shift);
+	i2c.a_clk_mask = (1 << gpio->ucClkA_Shift);
+	i2c.a_data_mask = (1 << gpio->ucDataA_Shift);
+
+	if (gpio->sucI2cId.sbfAccess.bfHW_Capable)
+		i2c.hw_capable = true;
+	else
+		i2c.hw_capable = false;
+
+	if (gpio->sucI2cId.ucAccess == 0xa0)
+		i2c.mm_i2c = true;
+	else
+		i2c.mm_i2c = false;
+
+	i2c.i2c_id = gpio->sucI2cId.ucAccess;
+
+	if (i2c.mask_clk_reg)
+		i2c.valid = true;
+	else
+		i2c.valid = false;
+
+	return i2c;
+}
+
+struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+							  uint8_t id)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	ATOM_GPIO_I2C_ASSIGMENT *gpio;
+	struct amdgpu_i2c_bus_rec i2c;
+	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+	struct _ATOM_GPIO_I2C_INFO *i2c_info;
+	uint16_t data_offset, size;
+	int i, num_indices;
+
+	memset(&i2c, 0, sizeof(struct amdgpu_i2c_bus_rec));
+	i2c.valid = false;
+
+	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+		i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+		gpio = &i2c_info->asGPIO_Info[0];
+		for (i = 0; i < num_indices; i++) {
+			if (gpio->sucI2cId.ucAccess == id) {
+				i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+				break;
+			}
+			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+		}
+	}
+
+	return i2c;
+}
+
+void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	ATOM_GPIO_I2C_ASSIGMENT *gpio;
+	struct amdgpu_i2c_bus_rec i2c;
+	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+	struct _ATOM_GPIO_I2C_INFO *i2c_info;
+	uint16_t data_offset, size;
+	int i, num_indices;
+	char stmp[32];
+
+	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+		i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+		gpio = &i2c_info->asGPIO_Info[0];
+		for (i = 0; i < num_indices; i++) {
+			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+			if (i2c.valid) {
+				sprintf(stmp, "0x%x", i2c.i2c_id);
+				adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+			}
+			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+		}
+	}
+}
+
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	ATOM_GPIO_I2C_ASSIGMENT *gpio;
+	struct amdgpu_i2c_bus_rec i2c;
+	int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+	struct _ATOM_GPIO_I2C_INFO *i2c_info;
+	uint16_t data_offset, size;
+	int i, num_indices;
+	char stmp[32];
+
+	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+		i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+		gpio = &i2c_info->asGPIO_Info[0];
+		for (i = 0; i < num_indices; i++) {
+			i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+			if (i2c.valid && i2c.i2c_id == i2c_id) {
+				sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
+				adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+				break;
+			}
+			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+		}
+	}
+}
+
+struct amdgpu_gpio_rec
+amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+			    u8 id)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	struct amdgpu_gpio_rec gpio;
+	int index = GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT);
+	struct _ATOM_GPIO_PIN_LUT *gpio_info;
+	ATOM_GPIO_PIN_ASSIGNMENT *pin;
+	u16 data_offset, size;
+	int i, num_indices;
+
+	memset(&gpio, 0, sizeof(struct amdgpu_gpio_rec));
+	gpio.valid = false;
+
+	if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+		gpio_info = (struct _ATOM_GPIO_PIN_LUT *)(ctx->bios + data_offset);
+
+		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+			sizeof(ATOM_GPIO_PIN_ASSIGNMENT);
+
+		pin = gpio_info->asGPIO_Pin;
+		for (i = 0; i < num_indices; i++) {
+			if (id == pin->ucGPIO_ID) {
+				gpio.id = pin->ucGPIO_ID;
+				gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex);
+				gpio.shift = pin->ucGpioPinBitShift;
+				gpio.mask = (1 << pin->ucGpioPinBitShift);
+				gpio.valid = true;
+				break;
+			}
+			pin = (ATOM_GPIO_PIN_ASSIGNMENT *)
+				((u8 *)pin + sizeof(ATOM_GPIO_PIN_ASSIGNMENT));
+		}
+	}
+
+	return gpio;
+}
+
+static struct amdgpu_hpd
+amdgpu_atombios_get_hpd_info_from_gpio(struct amdgpu_device *adev,
+				       struct amdgpu_gpio_rec *gpio)
+{
+	struct amdgpu_hpd hpd;
+	u32 reg;
+
+	memset(&hpd, 0, sizeof(struct amdgpu_hpd));
+
+	reg = amdgpu_display_hpd_get_gpio_reg(adev);
+
+	hpd.gpio = *gpio;
+	if (gpio->reg == reg) {
+		switch(gpio->mask) {
+		case (1 << 0):
+			hpd.hpd = AMDGPU_HPD_1;
+			break;
+		case (1 << 8):
+			hpd.hpd = AMDGPU_HPD_2;
+			break;
+		case (1 << 16):
+			hpd.hpd = AMDGPU_HPD_3;
+			break;
+		case (1 << 24):
+			hpd.hpd = AMDGPU_HPD_4;
+			break;
+		case (1 << 26):
+			hpd.hpd = AMDGPU_HPD_5;
+			break;
+		case (1 << 28):
+			hpd.hpd = AMDGPU_HPD_6;
+			break;
+		default:
+			hpd.hpd = AMDGPU_HPD_NONE;
+			break;
+		}
+	} else
+		hpd.hpd = AMDGPU_HPD_NONE;
+	return hpd;
+}
+
+static const int object_connector_convert[] = {
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVII,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_DVID,
+	DRM_MODE_CONNECTOR_VGA,
+	DRM_MODE_CONNECTOR_Composite,
+	DRM_MODE_CONNECTOR_SVIDEO,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_9PinDIN,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_HDMIA,
+	DRM_MODE_CONNECTOR_HDMIB,
+	DRM_MODE_CONNECTOR_LVDS,
+	DRM_MODE_CONNECTOR_9PinDIN,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_Unknown,
+	DRM_MODE_CONNECTOR_DisplayPort,
+	DRM_MODE_CONNECTOR_eDP,
+	DRM_MODE_CONNECTOR_Unknown
+};
+
+bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	struct atom_context *ctx = mode_info->atom_context;
+	int index = GetIndexIntoMasterTable(DATA, Object_Header);
+	u16 size, data_offset;
+	u8 frev, crev;
+	ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+	ATOM_OBJECT_HEADER *obj_header;
+
+	if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+		return false;
+
+	if (crev < 2)
+		return false;
+
+	obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+	path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usDisplayPathTableOffset));
+
+	if (path_obj->ucNumOfDispPath)
+		return true;
+	else
+		return false;
+}
+
+bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	struct atom_context *ctx = mode_info->atom_context;
+	int index = GetIndexIntoMasterTable(DATA, Object_Header);
+	u16 size, data_offset;
+	u8 frev, crev;
+	ATOM_CONNECTOR_OBJECT_TABLE *con_obj;
+	ATOM_ENCODER_OBJECT_TABLE *enc_obj;
+	ATOM_OBJECT_TABLE *router_obj;
+	ATOM_DISPLAY_OBJECT_PATH_TABLE *path_obj;
+	ATOM_OBJECT_HEADER *obj_header;
+	int i, j, k, path_size, device_support;
+	int connector_type;
+	u16 conn_id, connector_object_id;
+	struct amdgpu_i2c_bus_rec ddc_bus;
+	struct amdgpu_router router;
+	struct amdgpu_gpio_rec gpio;
+	struct amdgpu_hpd hpd;
+
+	if (!amdgpu_atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset))
+		return false;
+
+	if (crev < 2)
+		return false;
+
+	obj_header = (ATOM_OBJECT_HEADER *) (ctx->bios + data_offset);
+	path_obj = (ATOM_DISPLAY_OBJECT_PATH_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usDisplayPathTableOffset));
+	con_obj = (ATOM_CONNECTOR_OBJECT_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usConnectorObjectTableOffset));
+	enc_obj = (ATOM_ENCODER_OBJECT_TABLE *)
+	    (ctx->bios + data_offset +
+	     le16_to_cpu(obj_header->usEncoderObjectTableOffset));
+	router_obj = (ATOM_OBJECT_TABLE *)
+		(ctx->bios + data_offset +
+		 le16_to_cpu(obj_header->usRouterObjectTableOffset));
+	device_support = le16_to_cpu(obj_header->usDeviceSupport);
+
+	path_size = 0;
+	for (i = 0; i < path_obj->ucNumOfDispPath; i++) {
+		uint8_t *addr = (uint8_t *) path_obj->asDispPath;
+		ATOM_DISPLAY_OBJECT_PATH *path;
+		addr += path_size;
+		path = (ATOM_DISPLAY_OBJECT_PATH *) addr;
+		path_size += le16_to_cpu(path->usSize);
+
+		if (device_support & le16_to_cpu(path->usDeviceTag)) {
+			uint8_t con_obj_id =
+			    (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
+			    >> OBJECT_ID_SHIFT;
+
+			/* Skip TV/CV support */
+			if ((le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_TV1_SUPPORT) ||
+			    (le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_CV_SUPPORT))
+				continue;
+
+			if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+				DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+					  con_obj_id, le16_to_cpu(path->usDeviceTag));
+				continue;
+			}
+
+			connector_type =
+				object_connector_convert[con_obj_id];
+			connector_object_id = con_obj_id;
+
+			if (connector_type == DRM_MODE_CONNECTOR_Unknown)
+				continue;
+
+			router.ddc_valid = false;
+			router.cd_valid = false;
+			for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
+				uint8_t grph_obj_type =
+				    (le16_to_cpu(path->usGraphicObjIds[j]) &
+				     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
+
+				if (grph_obj_type == GRAPH_OBJECT_TYPE_ENCODER) {
+					for (k = 0; k < enc_obj->ucNumberOfObjects; k++) {
+						u16 encoder_obj = le16_to_cpu(enc_obj->asObjects[k].usObjectID);
+						if (le16_to_cpu(path->usGraphicObjIds[j]) == encoder_obj) {
+							ATOM_COMMON_RECORD_HEADER *record = (ATOM_COMMON_RECORD_HEADER *)
+								(ctx->bios + data_offset +
+								 le16_to_cpu(enc_obj->asObjects[k].usRecordOffset));
+							ATOM_ENCODER_CAP_RECORD *cap_record;
+							u16 caps = 0;
+
+							while (record->ucRecordSize > 0 &&
+							       record->ucRecordType > 0 &&
+							       record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+								switch (record->ucRecordType) {
+								case ATOM_ENCODER_CAP_RECORD_TYPE:
+									cap_record =(ATOM_ENCODER_CAP_RECORD *)
+										record;
+									caps = le16_to_cpu(cap_record->usEncoderCap);
+									break;
+								}
+								record = (ATOM_COMMON_RECORD_HEADER *)
+									((char *)record + record->ucRecordSize);
+							}
+							amdgpu_display_add_encoder(adev, encoder_obj,
+										    le16_to_cpu(path->usDeviceTag),
+										    caps);
+						}
+					}
+				} else if (grph_obj_type == GRAPH_OBJECT_TYPE_ROUTER) {
+					for (k = 0; k < router_obj->ucNumberOfObjects; k++) {
+						u16 router_obj_id = le16_to_cpu(router_obj->asObjects[k].usObjectID);
+						if (le16_to_cpu(path->usGraphicObjIds[j]) == router_obj_id) {
+							ATOM_COMMON_RECORD_HEADER *record = (ATOM_COMMON_RECORD_HEADER *)
+								(ctx->bios + data_offset +
+								 le16_to_cpu(router_obj->asObjects[k].usRecordOffset));
+							ATOM_I2C_RECORD *i2c_record;
+							ATOM_I2C_ID_CONFIG_ACCESS *i2c_config;
+							ATOM_ROUTER_DDC_PATH_SELECT_RECORD *ddc_path;
+							ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD *cd_path;
+							ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *router_src_dst_table =
+								(ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *)
+								(ctx->bios + data_offset +
+								 le16_to_cpu(router_obj->asObjects[k].usSrcDstTableOffset));
+							u8 *num_dst_objs = (u8 *)
+								((u8 *)router_src_dst_table + 1 +
+								 (router_src_dst_table->ucNumberOfSrc * 2));
+							u16 *dst_objs = (u16 *)(num_dst_objs + 1);
+							int enum_id;
+
+							router.router_id = router_obj_id;
+							for (enum_id = 0; enum_id < (*num_dst_objs); enum_id++) {
+								if (le16_to_cpu(path->usConnObjectId) ==
+								    le16_to_cpu(dst_objs[enum_id]))
+									break;
+							}
+
+							while (record->ucRecordSize > 0 &&
+							       record->ucRecordType > 0 &&
+							       record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+								switch (record->ucRecordType) {
+								case ATOM_I2C_RECORD_TYPE:
+									i2c_record =
+										(ATOM_I2C_RECORD *)
+										record;
+									i2c_config =
+										(ATOM_I2C_ID_CONFIG_ACCESS *)
+										&i2c_record->sucI2cId;
+									router.i2c_info =
+										amdgpu_atombios_lookup_i2c_gpio(adev,
+												       i2c_config->
+												       ucAccess);
+									router.i2c_addr = i2c_record->ucI2CAddr >> 1;
+									break;
+								case ATOM_ROUTER_DDC_PATH_SELECT_RECORD_TYPE:
+									ddc_path = (ATOM_ROUTER_DDC_PATH_SELECT_RECORD *)
+										record;
+									router.ddc_valid = true;
+									router.ddc_mux_type = ddc_path->ucMuxType;
+									router.ddc_mux_control_pin = ddc_path->ucMuxControlPin;
+									router.ddc_mux_state = ddc_path->ucMuxState[enum_id];
+									break;
+								case ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD_TYPE:
+									cd_path = (ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD *)
+										record;
+									router.cd_valid = true;
+									router.cd_mux_type = cd_path->ucMuxType;
+									router.cd_mux_control_pin = cd_path->ucMuxControlPin;
+									router.cd_mux_state = cd_path->ucMuxState[enum_id];
+									break;
+								}
+								record = (ATOM_COMMON_RECORD_HEADER *)
+									((char *)record + record->ucRecordSize);
+							}
+						}
+					}
+				}
+			}
+
+			/* look up gpio for ddc, hpd */
+			ddc_bus.valid = false;
+			hpd.hpd = AMDGPU_HPD_NONE;
+			if ((le16_to_cpu(path->usDeviceTag) &
+			     (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) == 0) {
+				for (j = 0; j < con_obj->ucNumberOfObjects; j++) {
+					if (le16_to_cpu(path->usConnObjectId) ==
+					    le16_to_cpu(con_obj->asObjects[j].
+							usObjectID)) {
+						ATOM_COMMON_RECORD_HEADER
+						    *record =
+						    (ATOM_COMMON_RECORD_HEADER
+						     *)
+						    (ctx->bios + data_offset +
+						     le16_to_cpu(con_obj->
+								 asObjects[j].
+								 usRecordOffset));
+						ATOM_I2C_RECORD *i2c_record;
+						ATOM_HPD_INT_RECORD *hpd_record;
+						ATOM_I2C_ID_CONFIG_ACCESS *i2c_config;
+
+						while (record->ucRecordSize > 0 &&
+						       record->ucRecordType > 0 &&
+						       record->ucRecordType <= ATOM_MAX_OBJECT_RECORD_NUMBER) {
+							switch (record->ucRecordType) {
+							case ATOM_I2C_RECORD_TYPE:
+								i2c_record =
+								    (ATOM_I2C_RECORD *)
+									record;
+								i2c_config =
+									(ATOM_I2C_ID_CONFIG_ACCESS *)
+									&i2c_record->sucI2cId;
+								ddc_bus = amdgpu_atombios_lookup_i2c_gpio(adev,
+												 i2c_config->
+												 ucAccess);
+								break;
+							case ATOM_HPD_INT_RECORD_TYPE:
+								hpd_record =
+									(ATOM_HPD_INT_RECORD *)
+									record;
+								gpio = amdgpu_atombios_lookup_gpio(adev,
+											  hpd_record->ucHPDIntGPIOID);
+								hpd = amdgpu_atombios_get_hpd_info_from_gpio(adev, &gpio);
+								hpd.plugged_state = hpd_record->ucPlugged_PinState;
+								break;
+							}
+							record =
+							    (ATOM_COMMON_RECORD_HEADER
+							     *) ((char *)record
+								 +
+								 record->
+								 ucRecordSize);
+						}
+						break;
+					}
+				}
+			}
+
+			/* needed for aux chan transactions */
+			ddc_bus.hpd = hpd.hpd;
+
+			conn_id = le16_to_cpu(path->usConnObjectId);
+
+			amdgpu_display_add_connector(adev,
+						      conn_id,
+						      le16_to_cpu(path->usDeviceTag),
+						      connector_type, &ddc_bus,
+						      connector_object_id,
+						      &hpd,
+						      &router);
+
+		}
+	}
+
+	amdgpu_link_encoder_connector(adev_to_drm(adev));
+
+	return true;
+}
+
+union firmware_info {
+	ATOM_FIRMWARE_INFO info;
+	ATOM_FIRMWARE_INFO_V1_2 info_12;
+	ATOM_FIRMWARE_INFO_V1_3 info_13;
+	ATOM_FIRMWARE_INFO_V1_4 info_14;
+	ATOM_FIRMWARE_INFO_V2_1 info_21;
+	ATOM_FIRMWARE_INFO_V2_2 info_22;
+};
+
+int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+	uint8_t frev, crev;
+	uint16_t data_offset;
+	int ret = -EINVAL;
+
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		int i;
+		struct amdgpu_pll *ppll = &adev->clock.ppll[0];
+		struct amdgpu_pll *spll = &adev->clock.spll;
+		struct amdgpu_pll *mpll = &adev->clock.mpll;
+		union firmware_info *firmware_info =
+			(union firmware_info *)(mode_info->atom_context->bios +
+						data_offset);
+		/* pixel clocks */
+		ppll->reference_freq =
+		    le16_to_cpu(firmware_info->info.usReferenceClock);
+		ppll->reference_div = 0;
+
+		ppll->pll_out_min =
+			le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+		ppll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
+
+		ppll->lcd_pll_out_min =
+			le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_min == 0)
+			ppll->lcd_pll_out_min = ppll->pll_out_min;
+		ppll->lcd_pll_out_max =
+			le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_max == 0)
+			ppll->lcd_pll_out_max = ppll->pll_out_max;
+
+		if (ppll->pll_out_min == 0)
+			ppll->pll_out_min = 64800;
+
+		ppll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Input);
+		ppll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxPixelClockPLL_Input);
+
+		ppll->min_post_div = 2;
+		ppll->max_post_div = 0x7f;
+		ppll->min_frac_feedback_div = 0;
+		ppll->max_frac_feedback_div = 9;
+		ppll->min_ref_div = 2;
+		ppll->max_ref_div = 0x3ff;
+		ppll->min_feedback_div = 4;
+		ppll->max_feedback_div = 0xfff;
+		ppll->best_vco = 0;
+
+		for (i = 1; i < AMDGPU_MAX_PPLL; i++)
+			adev->clock.ppll[i] = *ppll;
+
+		/* system clock */
+		spll->reference_freq =
+			le16_to_cpu(firmware_info->info_21.usCoreReferenceClock);
+		spll->reference_div = 0;
+
+		spll->pll_out_min =
+		    le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Output);
+		spll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxEngineClockPLL_Output);
+
+		/* ??? */
+		if (spll->pll_out_min == 0)
+			spll->pll_out_min = 64800;
+
+		spll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinEngineClockPLL_Input);
+		spll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxEngineClockPLL_Input);
+
+		spll->min_post_div = 1;
+		spll->max_post_div = 1;
+		spll->min_ref_div = 2;
+		spll->max_ref_div = 0xff;
+		spll->min_feedback_div = 4;
+		spll->max_feedback_div = 0xff;
+		spll->best_vco = 0;
+
+		/* memory clock */
+		mpll->reference_freq =
+			le16_to_cpu(firmware_info->info_21.usMemoryReferenceClock);
+		mpll->reference_div = 0;
+
+		mpll->pll_out_min =
+		    le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Output);
+		mpll->pll_out_max =
+		    le32_to_cpu(firmware_info->info.ulMaxMemoryClockPLL_Output);
+
+		/* ??? */
+		if (mpll->pll_out_min == 0)
+			mpll->pll_out_min = 64800;
+
+		mpll->pll_in_min =
+		    le16_to_cpu(firmware_info->info.usMinMemoryClockPLL_Input);
+		mpll->pll_in_max =
+		    le16_to_cpu(firmware_info->info.usMaxMemoryClockPLL_Input);
+
+		adev->clock.default_sclk =
+		    le32_to_cpu(firmware_info->info.ulDefaultEngineClock);
+		adev->clock.default_mclk =
+		    le32_to_cpu(firmware_info->info.ulDefaultMemoryClock);
+
+		mpll->min_post_div = 1;
+		mpll->max_post_div = 1;
+		mpll->min_ref_div = 2;
+		mpll->max_ref_div = 0xff;
+		mpll->min_feedback_div = 4;
+		mpll->max_feedback_div = 0xff;
+		mpll->best_vco = 0;
+
+		/* disp clock */
+		adev->clock.default_dispclk =
+			le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
+		/* set a reasonable default for DP */
+		if (adev->clock.default_dispclk < 53900) {
+			DRM_DEBUG("Changing default dispclk from %dMhz to 600Mhz\n",
+				  adev->clock.default_dispclk / 100);
+			adev->clock.default_dispclk = 60000;
+		} else if (adev->clock.default_dispclk <= 60000) {
+			DRM_DEBUG("Changing default dispclk from %dMhz to 625Mhz\n",
+				  adev->clock.default_dispclk / 100);
+			adev->clock.default_dispclk = 62500;
+		}
+		adev->clock.dp_extclk =
+			le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
+
+		adev->clock.max_pixel_clock = le16_to_cpu(firmware_info->info.usMaxPixelClock);
+		if (adev->clock.max_pixel_clock == 0)
+			adev->clock.max_pixel_clock = 40000;
+
+		/* not technically a clock, but... */
+		adev->mode_info.firmware_flags =
+			le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
+
+		ret = 0;
+	}
+
+	adev->pm.current_sclk = adev->clock.default_sclk;
+	adev->pm.current_mclk = adev->clock.default_mclk;
+
+	return ret;
+}
+
+union gfx_info {
+	ATOM_GFX_INFO_V2_1 info;
+};
+
+int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, GFX_Info);
+	uint8_t frev, crev;
+	uint16_t data_offset;
+	int ret = -EINVAL;
+
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union gfx_info *gfx_info = (union gfx_info *)
+			(mode_info->atom_context->bios + data_offset);
+
+		adev->gfx.config.max_shader_engines = gfx_info->info.max_shader_engines;
+		adev->gfx.config.max_tile_pipes = gfx_info->info.max_tile_pipes;
+		adev->gfx.config.max_cu_per_sh = gfx_info->info.max_cu_per_sh;
+		adev->gfx.config.max_sh_per_se = gfx_info->info.max_sh_per_se;
+		adev->gfx.config.max_backends_per_se = gfx_info->info.max_backends_per_se;
+		adev->gfx.config.max_texture_channel_caches =
+			gfx_info->info.max_texture_channel_caches;
+
+		ret = 0;
+	}
+	return ret;
+}
+
+union igp_info {
+	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9 info_9;
+};
+
+/*
+ * Return vram width from integrated system info table, if available,
+ * or 0 if not.
+ */
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	u8 frev, crev;
+
+	/* get any igp specific overrides */
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)
+			(mode_info->atom_context->bios + data_offset);
+		switch (crev) {
+		case 8:
+		case 9:
+			return igp_info->info_8.ucUMAChannelNumber * 64;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+static void amdgpu_atombios_get_igp_ss_overrides(struct amdgpu_device *adev,
+						 struct amdgpu_atom_ss *ss,
+						 int id)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	u8 frev, crev;
+	u16 percentage = 0, rate = 0;
+
+	/* get any igp specific overrides */
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)
+			(mode_info->atom_context->bios + data_offset);
+		switch (crev) {
+		case 6:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_6.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_6.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_6.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
+			break;
+		case 7:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_7.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_7.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_7.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
+			break;
+		case 8:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_8.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_8.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_8.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_8.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_8.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_8.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
+			break;
+		case 9:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_9.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_9.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_9.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_9.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_9.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_9.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
+			break;
+		default:
+			DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev);
+			break;
+		}
+		if (percentage)
+			ss->percentage = percentage;
+		if (rate)
+			ss->rate = rate;
+	}
+}
+
+union asic_ss_info {
+	struct _ATOM_ASIC_INTERNAL_SS_INFO info;
+	struct _ATOM_ASIC_INTERNAL_SS_INFO_V2 info_2;
+	struct _ATOM_ASIC_INTERNAL_SS_INFO_V3 info_3;
+};
+
+union asic_ss_assignment {
+	struct _ATOM_ASIC_SS_ASSIGNMENT v1;
+	struct _ATOM_ASIC_SS_ASSIGNMENT_V2 v2;
+	struct _ATOM_ASIC_SS_ASSIGNMENT_V3 v3;
+};
+
+bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
+				      struct amdgpu_atom_ss *ss,
+				      int id, u32 clock)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
+	uint16_t data_offset, size;
+	union asic_ss_info *ss_info;
+	union asic_ss_assignment *ss_assign;
+	uint8_t frev, crev;
+	int i, num_indices;
+
+	if (id == ASIC_INTERNAL_MEMORY_SS) {
+		if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT))
+			return false;
+	}
+	if (id == ASIC_INTERNAL_ENGINE_SS) {
+		if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT))
+			return false;
+	}
+
+	memset(ss, 0, sizeof(struct amdgpu_atom_ss));
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+
+		ss_info =
+			(union asic_ss_info *)(mode_info->atom_context->bios + data_offset);
+
+		switch (frev) {
+		case 1:
+			num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+				sizeof(ATOM_ASIC_SS_ASSIGNMENT);
+
+			ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info.asSpreadSpectrum[0]);
+			for (i = 0; i < num_indices; i++) {
+				if ((ss_assign->v1.ucClockIndication == id) &&
+				    (clock <= le32_to_cpu(ss_assign->v1.ulTargetClockRange))) {
+					ss->percentage =
+						le16_to_cpu(ss_assign->v1.usSpreadSpectrumPercentage);
+					ss->type = ss_assign->v1.ucSpreadSpectrumMode;
+					ss->rate = le16_to_cpu(ss_assign->v1.usSpreadRateInKhz);
+					ss->percentage_divider = 100;
+					return true;
+				}
+				ss_assign = (union asic_ss_assignment *)
+					((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT));
+			}
+			break;
+		case 2:
+			num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+				sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2);
+			ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_2.asSpreadSpectrum[0]);
+			for (i = 0; i < num_indices; i++) {
+				if ((ss_assign->v2.ucClockIndication == id) &&
+				    (clock <= le32_to_cpu(ss_assign->v2.ulTargetClockRange))) {
+					ss->percentage =
+						le16_to_cpu(ss_assign->v2.usSpreadSpectrumPercentage);
+					ss->type = ss_assign->v2.ucSpreadSpectrumMode;
+					ss->rate = le16_to_cpu(ss_assign->v2.usSpreadRateIn10Hz);
+					ss->percentage_divider = 100;
+					if ((crev == 2) &&
+					    ((id == ASIC_INTERNAL_ENGINE_SS) ||
+					     (id == ASIC_INTERNAL_MEMORY_SS)))
+						ss->rate /= 100;
+					return true;
+				}
+				ss_assign = (union asic_ss_assignment *)
+					((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V2));
+			}
+			break;
+		case 3:
+			num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+				sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
+			ss_assign = (union asic_ss_assignment *)((u8 *)&ss_info->info_3.asSpreadSpectrum[0]);
+			for (i = 0; i < num_indices; i++) {
+				if ((ss_assign->v3.ucClockIndication == id) &&
+				    (clock <= le32_to_cpu(ss_assign->v3.ulTargetClockRange))) {
+					ss->percentage =
+						le16_to_cpu(ss_assign->v3.usSpreadSpectrumPercentage);
+					ss->type = ss_assign->v3.ucSpreadSpectrumMode;
+					ss->rate = le16_to_cpu(ss_assign->v3.usSpreadRateIn10Hz);
+					if (ss_assign->v3.ucSpreadSpectrumMode &
+					    SS_MODE_V3_PERCENTAGE_DIV_BY_1000_MASK)
+						ss->percentage_divider = 1000;
+					else
+						ss->percentage_divider = 100;
+					if ((id == ASIC_INTERNAL_ENGINE_SS) ||
+					    (id == ASIC_INTERNAL_MEMORY_SS))
+						ss->rate /= 100;
+					if (adev->flags & AMD_IS_APU)
+						amdgpu_atombios_get_igp_ss_overrides(adev, ss, id);
+					return true;
+				}
+				ss_assign = (union asic_ss_assignment *)
+					((u8 *)ss_assign + sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3));
+			}
+			break;
+		default:
+			DRM_ERROR("Unsupported ASIC_InternalSS_Info table: %d %d\n", frev, crev);
+			break;
+		}
+
+	}
+	return false;
+}
+
+union get_clock_dividers {
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS v1;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 v2;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 v3;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 v4;
+	struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5 v5;
+	struct _COMPUTE_GPU_CLOCK_INPUT_PARAMETERS_V1_6 v6_in;
+	struct _COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 v6_out;
+};
+
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+				       u8 clock_type,
+				       u32 clock,
+				       bool strobe_mode,
+				       struct atom_clock_dividers *dividers)
+{
+	union get_clock_dividers args;
+	int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL);
+	u8 frev, crev;
+
+	memset(&args, 0, sizeof(args));
+	memset(dividers, 0, sizeof(struct atom_clock_dividers));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return -EINVAL;
+
+	switch (crev) {
+	case 2:
+	case 3:
+	case 5:
+		/* r6xx, r7xx, evergreen, ni, si.
+		 * TODO: add support for asic_type <= CHIP_RV770*/
+		if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
+			args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+
+			if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+			    index, (uint32_t *)&args, sizeof(args)))
+				return -EINVAL;
+
+			dividers->post_div = args.v3.ucPostDiv;
+			dividers->enable_post_div = (args.v3.ucCntlFlag &
+						     ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+			dividers->enable_dithen = (args.v3.ucCntlFlag &
+						   ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+			dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+			dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
+			dividers->ref_div = args.v3.ucRefDiv;
+			dividers->vco_mode = (args.v3.ucCntlFlag &
+					      ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+		} else {
+			/* for SI we use ComputeMemoryClockParam for memory plls */
+			if (adev->asic_type >= CHIP_TAHITI)
+				return -EINVAL;
+			args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
+			if (strobe_mode)
+				args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
+
+			if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+			    index, (uint32_t *)&args, sizeof(args)))
+				return -EINVAL;
+
+			dividers->post_div = args.v5.ucPostDiv;
+			dividers->enable_post_div = (args.v5.ucCntlFlag &
+						     ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
+			dividers->enable_dithen = (args.v5.ucCntlFlag &
+						   ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
+			dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
+			dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
+			dividers->ref_div = args.v5.ucRefDiv;
+			dividers->vco_mode = (args.v5.ucCntlFlag &
+					      ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
+		}
+		break;
+	case 4:
+		/* fusion */
+		args.v4.ulClock = cpu_to_le32(clock);	/* 10 khz */
+
+		if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+		    index, (uint32_t *)&args, sizeof(args)))
+			return -EINVAL;
+
+		dividers->post_divider = dividers->post_div = args.v4.ucPostDiv;
+		dividers->real_clock = le32_to_cpu(args.v4.ulClock);
+		break;
+	case 6:
+		/* CI */
+		/* COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, COMPUTE_GPUCLK_INPUT_FLAG_SCLK */
+		args.v6_in.ulClock.ulComputeClockFlag = clock_type;
+		args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock);	/* 10 khz */
+
+		if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+		    index, (uint32_t *)&args, sizeof(args)))
+			return -EINVAL;
+
+		dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv);
+		dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac);
+		dividers->ref_div = args.v6_out.ucPllRefDiv;
+		dividers->post_div = args.v6_out.ucPllPostDiv;
+		dividers->flags = args.v6_out.ucPllCntlFlag;
+		dividers->real_clock = le32_to_cpu(args.v6_out.ulClock.ulClock);
+		dividers->post_divider = args.v6_out.ulClock.ucPostDiv;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
+					    u32 clock,
+					    bool strobe_mode,
+					    struct atom_mpll_param *mpll_param)
+{
+	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 args;
+	int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam);
+	u8 frev, crev;
+
+	memset(&args, 0, sizeof(args));
+	memset(mpll_param, 0, sizeof(struct atom_mpll_param));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return -EINVAL;
+
+	switch (frev) {
+	case 2:
+		switch (crev) {
+		case 1:
+			/* SI */
+			args.ulClock = cpu_to_le32(clock);	/* 10 khz */
+			args.ucInputFlag = 0;
+			if (strobe_mode)
+				args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN;
+
+			if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+			    index, (uint32_t *)&args, sizeof(args)))
+				return -EINVAL;
+
+			mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac);
+			mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv);
+			mpll_param->post_div = args.ucPostDiv;
+			mpll_param->dll_speed = args.ucDllSpeed;
+			mpll_param->bwcntl = args.ucBWCntl;
+			mpll_param->vco_mode =
+				(args.ucPllCntlFlag & MPLL_CNTL_FLAG_VCO_MODE_MASK);
+			mpll_param->yclk_sel =
+				(args.ucPllCntlFlag & MPLL_CNTL_FLAG_BYPASS_DQ_PLL) ? 1 : 0;
+			mpll_param->qdr =
+				(args.ucPllCntlFlag & MPLL_CNTL_FLAG_QDR_ENABLE) ? 1 : 0;
+			mpll_param->half_rate =
+				(args.ucPllCntlFlag & MPLL_CNTL_FLAG_AD_HALF_RATE) ? 1 : 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+					    u32 eng_clock, u32 mem_clock)
+{
+	SET_ENGINE_CLOCK_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
+	u32 tmp;
+
+	memset(&args, 0, sizeof(args));
+
+	tmp = eng_clock & SET_CLOCK_FREQ_MASK;
+	tmp |= (COMPUTE_ENGINE_PLL_PARAM << 24);
+
+	args.ulTargetEngineClock = cpu_to_le32(tmp);
+	if (mem_clock)
+		args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
+
+	return amdgpu_atom_execute_table(adev->mode_info.atom_context, index,
+					 (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+					  u16 *vddc, u16 *vddci, u16 *mvdd)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+	u8 frev, crev;
+	u16 data_offset;
+	union firmware_info *firmware_info;
+
+	*vddc = 0;
+	*vddci = 0;
+	*mvdd = 0;
+
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		firmware_info =
+			(union firmware_info *)(mode_info->atom_context->bios +
+						data_offset);
+		*vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage);
+		if ((frev == 2) && (crev >= 2)) {
+			*vddci = le16_to_cpu(firmware_info->info_22.usBootUpVDDCIVoltage);
+			*mvdd = le16_to_cpu(firmware_info->info_22.usBootUpMVDDCVoltage);
+		}
+	}
+}
+
+union set_voltage {
+	struct _SET_VOLTAGE_PS_ALLOCATION alloc;
+	struct _SET_VOLTAGE_PARAMETERS v1;
+	struct _SET_VOLTAGE_PARAMETERS_V2 v2;
+	struct _SET_VOLTAGE_PARAMETERS_V1_3 v3;
+};
+
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+			     u16 voltage_id, u16 *voltage)
+{
+	union set_voltage args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
+	u8 frev, crev;
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return -EINVAL;
+
+	switch (crev) {
+	case 1:
+		return -EINVAL;
+	case 2:
+		args.v2.ucVoltageType = SET_VOLTAGE_GET_MAX_VOLTAGE;
+		args.v2.ucVoltageMode = 0;
+		args.v2.usVoltageLevel = 0;
+
+		if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+		    index, (uint32_t *)&args, sizeof(args)))
+			return -EINVAL;
+
+		*voltage = le16_to_cpu(args.v2.usVoltageLevel);
+		break;
+	case 3:
+		args.v3.ucVoltageType = voltage_type;
+		args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL;
+		args.v3.usVoltageLevel = cpu_to_le16(voltage_id);
+
+		if (amdgpu_atom_execute_table(adev->mode_info.atom_context,
+		    index, (uint32_t *)&args, sizeof(args)))
+			return -EINVAL;
+
+		*voltage = le16_to_cpu(args.v3.usVoltageLevel);
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+						      u16 *voltage,
+						      u16 leakage_idx)
+{
+	return amdgpu_atombios_get_max_vddc(adev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
+}
+
+union voltage_object_info {
+	struct _ATOM_VOLTAGE_OBJECT_INFO v1;
+	struct _ATOM_VOLTAGE_OBJECT_INFO_V2 v2;
+	struct _ATOM_VOLTAGE_OBJECT_INFO_V3_1 v3;
+};
+
+union voltage_object {
+	struct _ATOM_VOLTAGE_OBJECT v1;
+	struct _ATOM_VOLTAGE_OBJECT_V2 v2;
+	union _ATOM_VOLTAGE_OBJECT_V3 v3;
+};
+
+
+static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_atombios_lookup_voltage_object_v3(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *v3,
+									u8 voltage_type, u8 voltage_mode)
+{
+	u32 size = le16_to_cpu(v3->sHeader.usStructureSize);
+	u32 offset = offsetof(ATOM_VOLTAGE_OBJECT_INFO_V3_1, asVoltageObj[0]);
+	u8 *start = (u8 *)v3;
+
+	while (offset < size) {
+		ATOM_VOLTAGE_OBJECT_V3 *vo = (ATOM_VOLTAGE_OBJECT_V3 *)(start + offset);
+		if ((vo->asGpioVoltageObj.sHeader.ucVoltageType == voltage_type) &&
+		    (vo->asGpioVoltageObj.sHeader.ucVoltageMode == voltage_mode))
+			return vo;
+		offset += le16_to_cpu(vo->asGpioVoltageObj.sHeader.usSize);
+	}
+	return NULL;
+}
+
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+			      u8 voltage_type,
+			      u8 *svd_gpio_id, u8 *svc_gpio_id)
+{
+	int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+	union voltage_object_info *voltage_info;
+	union voltage_object *voltage_object = NULL;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		voltage_info = (union voltage_object_info *)
+			(adev->mode_info.atom_context->bios + data_offset);
+
+		switch (frev) {
+		case 3:
+			switch (crev) {
+			case 1:
+				voltage_object = (union voltage_object *)
+					amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+								      voltage_type,
+								      VOLTAGE_OBJ_SVID2);
+				if (voltage_object) {
+					*svd_gpio_id = voltage_object->v3.asSVID2Obj.ucSVDGpioId;
+					*svc_gpio_id = voltage_object->v3.asSVID2Obj.ucSVCGpioId;
+				} else {
+					return -EINVAL;
+				}
+				break;
+			default:
+				DRM_ERROR("unknown voltage object table\n");
+				return -EINVAL;
+			}
+			break;
+		default:
+			DRM_ERROR("unknown voltage object table\n");
+			return -EINVAL;
+		}
+
+	}
+	return 0;
+}
+
+bool
+amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
+				u8 voltage_type, u8 voltage_mode)
+{
+	int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+	union voltage_object_info *voltage_info;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		voltage_info = (union voltage_object_info *)
+			(adev->mode_info.atom_context->bios + data_offset);
+
+		switch (frev) {
+		case 3:
+			switch (crev) {
+			case 1:
+				if (amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+								  voltage_type, voltage_mode))
+					return true;
+				break;
+			default:
+				DRM_ERROR("unknown voltage object table\n");
+				return false;
+			}
+			break;
+		default:
+			DRM_ERROR("unknown voltage object table\n");
+			return false;
+		}
+
+	}
+	return false;
+}
+
+int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
+				      u8 voltage_type, u8 voltage_mode,
+				      struct atom_voltage_table *voltage_table)
+{
+	int index = GetIndexIntoMasterTable(DATA, VoltageObjectInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+	int i;
+	union voltage_object_info *voltage_info;
+	union voltage_object *voltage_object = NULL;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		voltage_info = (union voltage_object_info *)
+			(adev->mode_info.atom_context->bios + data_offset);
+
+		switch (frev) {
+		case 3:
+			switch (crev) {
+			case 1:
+				voltage_object = (union voltage_object *)
+					amdgpu_atombios_lookup_voltage_object_v3(&voltage_info->v3,
+								      voltage_type, voltage_mode);
+				if (voltage_object) {
+					ATOM_GPIO_VOLTAGE_OBJECT_V3 *gpio =
+						&voltage_object->v3.asGpioVoltageObj;
+					VOLTAGE_LUT_ENTRY_V2 *lut;
+					if (gpio->ucGpioEntryNum > MAX_VOLTAGE_ENTRIES)
+						return -EINVAL;
+					lut = &gpio->asVolGpioLut[0];
+					for (i = 0; i < gpio->ucGpioEntryNum; i++) {
+						voltage_table->entries[i].value =
+							le16_to_cpu(lut->usVoltageValue);
+						voltage_table->entries[i].smio_low =
+							le32_to_cpu(lut->ulVoltageId);
+						lut = (VOLTAGE_LUT_ENTRY_V2 *)
+							((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY_V2));
+					}
+					voltage_table->mask_low = le32_to_cpu(gpio->ulGpioMaskVal);
+					voltage_table->count = gpio->ucGpioEntryNum;
+					voltage_table->phase_delay = gpio->ucPhaseDelay;
+					return 0;
+				}
+				break;
+			default:
+				DRM_ERROR("unknown voltage object table\n");
+				return -EINVAL;
+			}
+			break;
+		default:
+			DRM_ERROR("unknown voltage object table\n");
+			return -EINVAL;
+		}
+	}
+	return -EINVAL;
+}
+
+union vram_info {
+	struct _ATOM_VRAM_INFO_V3 v1_3;
+	struct _ATOM_VRAM_INFO_V4 v1_4;
+	struct _ATOM_VRAM_INFO_HEADER_V2_1 v2_1;
+};
+
+#define MEM_ID_MASK           0xff000000
+#define MEM_ID_SHIFT          24
+#define CLOCK_RANGE_MASK      0x00ffffff
+#define CLOCK_RANGE_SHIFT     0
+#define LOW_NIBBLE_MASK       0xf
+#define DATA_EQU_PREV         0
+#define DATA_FROM_TABLE       4
+
+int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
+				      u8 module_index,
+				      struct atom_mc_reg_table *reg_table)
+{
+	int index = GetIndexIntoMasterTable(DATA, VRAM_Info);
+	u8 frev, crev, num_entries, t_mem_id, num_ranges = 0;
+	u32 i = 0, j;
+	u16 data_offset, size;
+	union vram_info *vram_info;
+
+	memset(reg_table, 0, sizeof(struct atom_mc_reg_table));
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		vram_info = (union vram_info *)
+			(adev->mode_info.atom_context->bios + data_offset);
+		switch (frev) {
+		case 1:
+			DRM_ERROR("old table version %d, %d\n", frev, crev);
+			return -EINVAL;
+		case 2:
+			switch (crev) {
+			case 1:
+				if (module_index < vram_info->v2_1.ucNumOfVRAMModule) {
+					ATOM_INIT_REG_BLOCK *reg_block =
+						(ATOM_INIT_REG_BLOCK *)
+						((u8 *)vram_info + le16_to_cpu(vram_info->v2_1.usMemClkPatchTblOffset));
+					ATOM_MEMORY_SETTING_DATA_BLOCK *reg_data =
+						(ATOM_MEMORY_SETTING_DATA_BLOCK *)
+						((u8 *)reg_block + (2 * sizeof(u16)) +
+						 le16_to_cpu(reg_block->usRegIndexTblSize));
+					ATOM_INIT_REG_INDEX_FORMAT *format = &reg_block->asRegIndexBuf[0];
+					num_entries = (u8)((le16_to_cpu(reg_block->usRegIndexTblSize)) /
+							   sizeof(ATOM_INIT_REG_INDEX_FORMAT)) - 1;
+					if (num_entries > VBIOS_MC_REGISTER_ARRAY_SIZE)
+						return -EINVAL;
+					while (i < num_entries) {
+						if (format->ucPreRegDataLength & ACCESS_PLACEHOLDER)
+							break;
+						reg_table->mc_reg_address[i].s1 =
+							(u16)(le16_to_cpu(format->usRegIndex));
+						reg_table->mc_reg_address[i].pre_reg_data =
+							(u8)(format->ucPreRegDataLength);
+						i++;
+						format = (ATOM_INIT_REG_INDEX_FORMAT *)
+							((u8 *)format + sizeof(ATOM_INIT_REG_INDEX_FORMAT));
+					}
+					reg_table->last = i;
+					while ((le32_to_cpu(*(u32 *)reg_data) != END_OF_REG_DATA_BLOCK) &&
+					       (num_ranges < VBIOS_MAX_AC_TIMING_ENTRIES)) {
+						t_mem_id = (u8)((le32_to_cpu(*(u32 *)reg_data) & MEM_ID_MASK)
+								>> MEM_ID_SHIFT);
+						if (module_index == t_mem_id) {
+							reg_table->mc_reg_table_entry[num_ranges].mclk_max =
+								(u32)((le32_to_cpu(*(u32 *)reg_data) & CLOCK_RANGE_MASK)
+								      >> CLOCK_RANGE_SHIFT);
+							for (i = 0, j = 1; i < reg_table->last; i++) {
+								if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_FROM_TABLE) {
+									reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
+										(u32)le32_to_cpu(*((u32 *)reg_data + j));
+									j++;
+								} else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
+									if (i == 0)
+										continue;
+									reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
+										reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
+								}
+							}
+							num_ranges++;
+						}
+						reg_data = (ATOM_MEMORY_SETTING_DATA_BLOCK *)
+							((u8 *)reg_data + le16_to_cpu(reg_block->usRegDataBlkSize));
+					}
+					if (le32_to_cpu(*(u32 *)reg_data) != END_OF_REG_DATA_BLOCK)
+						return -EINVAL;
+					reg_table->num_entries = num_ranges;
+				} else
+					return -EINVAL;
+				break;
+			default:
+				DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+				return -EINVAL;
+			}
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			return -EINVAL;
+		}
+		return 0;
+	}
+	return -EINVAL;
+}
+#endif
+
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
+{
+	int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+					  &frev, &crev, &data_offset))
+		return true;
+
+	return false;
+}
+
+void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
+{
+	uint32_t bios_6_scratch;
+
+	bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
+
+	if (lock) {
+		bios_6_scratch |= ATOM_S6_CRITICAL_STATE;
+		bios_6_scratch &= ~ATOM_S6_ACC_MODE;
+	} else {
+		bios_6_scratch &= ~ATOM_S6_CRITICAL_STATE;
+		bios_6_scratch |= ATOM_S6_ACC_MODE;
+	}
+
+	WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
+}
+
+static void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev)
+{
+	uint32_t bios_2_scratch, bios_6_scratch;
+
+	adev->bios_scratch_reg_offset = mmBIOS_SCRATCH_0;
+
+	bios_2_scratch = RREG32(adev->bios_scratch_reg_offset + 2);
+	bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6);
+
+	/* let the bios control the backlight */
+	bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE;
+
+	/* tell the bios not to handle mode switching */
+	bios_6_scratch |= ATOM_S6_ACC_BLOCK_DISPLAY_SWITCH;
+
+	/* clear the vbios dpms state */
+	bios_2_scratch &= ~ATOM_S2_DEVICE_DPMS_STATE;
+
+	WREG32(adev->bios_scratch_reg_offset + 2, bios_2_scratch);
+	WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch);
+}
+
+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+					      bool hung)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3);
+
+	if (hung)
+		tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+	else
+		tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG;
+
+	WREG32(adev->bios_scratch_reg_offset + 3, tmp);
+}
+
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+						      u32 backlight_level)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2);
+
+	tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+	tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+		ATOM_S2_CURRENT_BL_LEVEL_MASK;
+
+	WREG32(adev->bios_scratch_reg_offset + 2, tmp);
+}
+
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
+
+	if (tmp & ATOM_S7_ASIC_INIT_COMPLETE_MASK)
+		return false;
+	else
+		return true;
+}
+
+/* Atom needs data in little endian format so swap as appropriate when copying
+ * data to or from atom. Note that atom operates on dw units.
+ *
+ * Use to_le=true when sending data to atom and provide at least
+ * ALIGN(num_bytes,4) bytes in the dst buffer.
+ *
+ * Use to_le=false when receiving data from atom and provide ALIGN(num_bytes,4)
+ * byes in the src buffer.
+ */
+void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+{
+#ifdef __BIG_ENDIAN
+	u32 src_tmp[5], dst_tmp[5];
+	int i;
+	u8 align_num_bytes = ALIGN(num_bytes, 4);
+
+	if (to_le) {
+		memcpy(src_tmp, src, num_bytes);
+		for (i = 0; i < align_num_bytes / 4; i++)
+			dst_tmp[i] = cpu_to_le32(src_tmp[i]);
+		memcpy(dst, dst_tmp, align_num_bytes);
+	} else {
+		memcpy(src_tmp, src, align_num_bytes);
+		for (i = 0; i < align_num_bytes / 4; i++)
+			dst_tmp[i] = le32_to_cpu(src_tmp[i]);
+		memcpy(dst, dst_tmp, num_bytes);
+	}
+#else
+	memcpy(dst, src, num_bytes);
+#endif
+}
+
+static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware);
+	uint16_t data_offset;
+	int usage_bytes = 0;
+	struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage;
+	u64 start_addr;
+	u64 size;
+
+	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
+		firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset);
+
+		DRM_DEBUG("atom firmware requested %08x %dkb\n",
+			  le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware),
+			  le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb));
+
+		start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware;
+		size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb;
+
+		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+			/* Firmware request VRAM reservation for SR-IOV */
+			adev->mman.fw_vram_usage_start_offset = (start_addr &
+				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+			adev->mman.fw_vram_usage_size = size << 10;
+			/* Use the default scratch size */
+			usage_bytes = 0;
+		} else {
+			usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024;
+		}
+	}
+	ctx->scratch_size_bytes = 0;
+	if (usage_bytes == 0)
+		usage_bytes = 20 * 1024;
+	/* allocate some scratch memory */
+	ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+	if (!ctx->scratch)
+		return -ENOMEM;
+	ctx->scratch_size_bytes = usage_bytes;
+	return 0;
+}
+
+/* ATOM accessor methods */
+/*
+ * ATOM is an interpreted byte code stored in tables in the vbios.  The
+ * driver registers callbacks to access registers and the interpreter
+ * in the driver parses the tables and executes then to program specific
+ * actions (set display modes, asic init, etc.).  See amdgpu_atombios.c,
+ * atombios.h, and atom.c
+ */
+
+/**
+ * cail_pll_read - read PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the PLL register.
+ */
+static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
+{
+	return 0;
+}
+
+/**
+ * cail_pll_write - write PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_mc_read - read MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ *
+ * Provides an MC register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MC register.
+ */
+static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
+{
+	return 0;
+}
+
+/**
+ * cail_mc_write - write MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MC register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+
+}
+
+/**
+ * cail_reg_write - write MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MMIO register accessor for the atom interpreter (r4xx+).
+ */
+static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
+{
+	struct amdgpu_device *adev = drm_to_adev(info->dev);
+
+	WREG32(reg, val);
+}
+
+/**
+ * cail_reg_read - read MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ *
+ * Provides an MMIO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MMIO register.
+ */
+static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
+{
+	struct amdgpu_device *adev = drm_to_adev(info->dev);
+	uint32_t r;
+
+	r = RREG32(reg);
+	return r;
+}
+
+static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
+}
+
+static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	return sysfs_emit(buf, "%s\n", ctx->build_num);
+}
+
+static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
+		   NULL);
+static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL);
+
+static struct attribute *amdgpu_vbios_version_attrs[] = {
+	&dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL
+};
+
+static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj,
+						     struct attribute *attr,
+						     int index)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num))
+		return 0;
+
+	return attr->mode;
+}
+
+const struct attribute_group amdgpu_vbios_version_attr_group = {
+	.attrs = amdgpu_vbios_version_attrs,
+	.is_visible = amdgpu_vbios_version_attrs_is_visible,
+};
+
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.atom_context)
+		return devm_device_add_group(adev->dev,
+					     &amdgpu_vbios_version_attr_group);
+
+	return 0;
+}
+
+/**
+ * amdgpu_atombios_fini - free the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the driver info and register access callbacks for the ATOM
+ * interpreter (r4xx+).
+ * Called at driver shutdown.
+ */
+void amdgpu_atombios_fini(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.atom_context) {
+		kfree(adev->mode_info.atom_context->scratch);
+		kfree(adev->mode_info.atom_context->iio);
+	}
+	kfree(adev->mode_info.atom_context);
+	adev->mode_info.atom_context = NULL;
+	kfree(adev->mode_info.atom_card_info);
+	adev->mode_info.atom_card_info = NULL;
+}
+
+/**
+ * amdgpu_atombios_init - init the driver info and callbacks for atombios
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initializes the driver info and register access callbacks for the
+ * ATOM interpreter (r4xx+).
+ * Returns 0 on sucess, -ENOMEM on failure.
+ * Called at driver startup.
+ */
+int amdgpu_atombios_init(struct amdgpu_device *adev)
+{
+	struct card_info *atom_card_info =
+	    kzalloc(sizeof(struct card_info), GFP_KERNEL);
+
+	if (!atom_card_info)
+		return -ENOMEM;
+
+	adev->mode_info.atom_card_info = atom_card_info;
+	atom_card_info->dev = adev_to_drm(adev);
+	atom_card_info->reg_read = cail_reg_read;
+	atom_card_info->reg_write = cail_reg_write;
+	atom_card_info->mc_read = cail_mc_read;
+	atom_card_info->mc_write = cail_mc_write;
+	atom_card_info->pll_read = cail_pll_read;
+	atom_card_info->pll_write = cail_pll_write;
+
+	adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios);
+	if (!adev->mode_info.atom_context) {
+		amdgpu_atombios_fini(adev);
+		return -ENOMEM;
+	}
+
+	mutex_init(&adev->mode_info.atom_context->mutex);
+	if (adev->is_atom_fw) {
+		amdgpu_atomfirmware_scratch_regs_init(adev);
+		amdgpu_atomfirmware_allocate_fb_scratch(adev);
+		/* cached firmware_flags for further usage */
+		adev->mode_info.firmware_flags =
+			amdgpu_atomfirmware_query_firmware_capability(adev);
+	} else {
+		amdgpu_atombios_scratch_regs_init(adev);
+		amdgpu_atombios_allocate_fb_scratch(adev);
+	}
+
+	return 0;
+}
+
+int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
+				   uint32_t table,
+				   uint16_t *size,
+				   uint8_t *frev,
+				   uint8_t *crev,
+				   uint8_t **addr)
+{
+	uint16_t data_start;
+
+	if (!amdgpu_atom_parse_data_header(adev->mode_info.atom_context, table,
+					   size, frev, crev, &data_start))
+		return -EINVAL;
+
+	*addr = (uint8_t *)adev->mode_info.atom_context->bios + data_start;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
new file mode 100644
index 000000000000..867bc5c5ce67
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ATOMBIOS_H__
+#define __AMDGPU_ATOMBIOS_H__
+
+struct atom_clock_dividers {
+	u32 post_div;
+	union {
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 reserved : 6;
+			u32 whole_fb_div : 12;
+			u32 frac_fb_div : 14;
+#else
+			u32 frac_fb_div : 14;
+			u32 whole_fb_div : 12;
+			u32 reserved : 6;
+#endif
+		};
+		u32 fb_div;
+	};
+	u32 ref_div;
+	bool enable_post_div;
+	bool enable_dithen;
+	u32 vco_mode;
+	u32 real_clock;
+	/* added for CI */
+	u32 post_divider;
+	u32 flags;
+};
+
+struct atom_mpll_param {
+	union {
+		struct {
+#ifdef __BIG_ENDIAN
+			u32 reserved : 8;
+			u32 clkfrac : 12;
+			u32 clkf : 12;
+#else
+			u32 clkf : 12;
+			u32 clkfrac : 12;
+			u32 reserved : 8;
+#endif
+		};
+		u32 fb_div;
+	};
+	u32 post_div;
+	u32 bwcntl;
+	u32 dll_speed;
+	u32 vco_mode;
+	u32 yclk_sel;
+	u32 qdr;
+	u32 half_rate;
+};
+
+#define MEM_TYPE_GDDR5  0x50
+#define MEM_TYPE_GDDR4  0x40
+#define MEM_TYPE_GDDR3  0x30
+#define MEM_TYPE_DDR2   0x20
+#define MEM_TYPE_GDDR1  0x10
+#define MEM_TYPE_DDR3   0xb0
+#define MEM_TYPE_MASK   0xf0
+
+struct atom_memory_info {
+	u8 mem_vendor;
+	u8 mem_type;
+};
+
+#define MAX_AC_TIMING_ENTRIES 16
+
+struct atom_memory_clock_range_table {
+	u8 num_entries;
+	u8 rsv[3];
+	u32 mclk[MAX_AC_TIMING_ENTRIES];
+};
+
+#define VBIOS_MC_REGISTER_ARRAY_SIZE 32
+#define VBIOS_MAX_AC_TIMING_ENTRIES 20
+
+struct atom_mc_reg_entry {
+	u32 mclk_max;
+	u32 mc_data[VBIOS_MC_REGISTER_ARRAY_SIZE];
+};
+
+struct atom_mc_register_address {
+	u16 s1;
+	u8 pre_reg_data;
+};
+
+struct atom_mc_reg_table {
+	u8 last;
+	u8 num_entries;
+	struct atom_mc_reg_entry mc_reg_table_entry[VBIOS_MAX_AC_TIMING_ENTRIES];
+	struct atom_mc_register_address mc_reg_address[VBIOS_MC_REGISTER_ARRAY_SIZE];
+};
+
+#define MAX_VOLTAGE_ENTRIES 32
+
+struct atom_voltage_table_entry {
+	u16 value;
+	u32 smio_low;
+};
+
+struct atom_voltage_table {
+	u32 count;
+	u32 mask_low;
+	u32 phase_delay;
+	struct atom_voltage_table_entry entries[MAX_VOLTAGE_ENTRIES];
+};
+
+struct amdgpu_gpio_rec
+amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
+			    u8 id);
+
+struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
+							  uint8_t id);
+void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
+
+bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
+
+bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev);
+
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev);
+
+bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
+				      struct amdgpu_atom_ss *ss,
+				      int id, u32 clock);
+
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+				       u8 clock_type,
+				       u32 clock,
+				       bool strobe_mode,
+				       struct atom_clock_dividers *dividers);
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
+					    u32 clock,
+					    bool strobe_mode,
+					    struct atom_mpll_param *mpll_param);
+
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+					    u32 eng_clock, u32 mem_clock);
+
+bool
+amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
+				u8 voltage_type, u8 voltage_mode);
+
+int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev,
+				      u8 voltage_type, u8 voltage_mode,
+				      struct atom_voltage_table *voltage_table);
+
+int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
+				      u8 module_index,
+				      struct atom_mc_reg_table *reg_table);
+int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type,
+			     u16 voltage_id, u16 *voltage);
+int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev,
+						      u16 *voltage,
+						      u16 leakage_idx);
+void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
+					  u16 *vddc, u16 *vddci, u16 *mvdd);
+int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev,
+			      u8 voltage_type,
+			      u8 *svd_gpio_id, u8 *svc_gpio_id);
+#endif
+
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
+
+void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
+void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
+					      bool hung);
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+						      u32 backlight_level);
+bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
+
+void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
+int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev,
+				       u8 clock_type,
+				       u32 clock,
+				       bool strobe_mode,
+				       struct atom_clock_dividers *dividers);
+
+int amdgpu_atombios_get_data_table(struct amdgpu_device *adev,
+				   uint32_t table,
+				   uint16_t *size,
+				   uint8_t *frev,
+				   uint8_t *crev,
+				   uint8_t **addr);
+
+void amdgpu_atombios_fini(struct amdgpu_device *adev);
+int amdgpu_atombios_init(struct amdgpu_device *adev);
+int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
new file mode 100644
index 000000000000..636385c80f64
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -0,0 +1,1011 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atomfirmware.h"
+#include "amdgpu_atomfirmware.h"
+#include "atom.h"
+#include "atombios.h"
+#include "soc15_hw_ip.h"
+
+union firmware_info {
+	struct atom_firmware_info_v3_1 v31;
+	struct atom_firmware_info_v3_2 v32;
+	struct atom_firmware_info_v3_3 v33;
+	struct atom_firmware_info_v3_4 v34;
+	struct atom_firmware_info_v3_5 v35;
+};
+
+/*
+ * Helper function to query firmware capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return firmware_capability in firmwareinfo table on success or 0 if not
+ */
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union firmware_info *firmware_info;
+	u8 frev, crev;
+	u32 fw_cap = 0;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+			firmwareinfo);
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+				index, &size, &frev, &crev, &data_offset)) {
+		/* support firmware_info 3.1 + */
+		if ((frev == 3 && crev >= 1) || (frev > 3)) {
+			firmware_info = (union firmware_info *)
+				(mode_info->atom_context->bios + data_offset);
+			fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability);
+		}
+	}
+
+	return fw_cap;
+}
+
+/*
+ * Helper function to query gpu virtualizaiton capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if gpu virtualization is supported or false if not
+ */
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev)
+{
+	u32 fw_cap;
+
+	fw_cap = adev->mode_info.firmware_flags;
+
+	return (fw_cap & ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) ? true : false;
+}
+
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
+{
+	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						firmwareinfo);
+	uint16_t data_offset;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL,
+					  NULL, NULL, &data_offset)) {
+		struct atom_firmware_info_v3_1 *firmware_info =
+			(struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios +
+							   data_offset);
+
+		adev->bios_scratch_reg_offset =
+			le32_to_cpu(firmware_info->bios_scratch_reg_startaddr);
+	}
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+	struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+	u32 start_addr, fw_size, drv_size;
+
+	start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+	drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+	DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+			  start_addr,
+			  fw_size,
+			  drv_size);
+
+	if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+		(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+		/* Firmware request VRAM reservation for SR-IOV */
+		adev->mman.fw_vram_usage_start_offset = (start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.fw_vram_usage_size = fw_size << 10;
+		/* Use the default scratch size */
+		*usage_bytes = 0;
+	} else {
+		*usage_bytes = drv_size << 10;
+	}
+	return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+		struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+	u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+	fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+	drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+	drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+	DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+			  fw_start_addr,
+			  fw_size,
+			  drv_start_addr,
+			  drv_size);
+
+	if (amdgpu_sriov_vf(adev) &&
+	    ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+		/* Firmware request VRAM reservation for SR-IOV */
+		adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.fw_vram_usage_size = fw_size << 10;
+	}
+
+	if (amdgpu_sriov_vf(adev) &&
+	    ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+		/* driver request VRAM reservation for SR-IOV */
+		adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.drv_vram_usage_size = drv_size << 10;
+	}
+
+	*usage_bytes = 0;
+	return 0;
+}
+
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						vram_usagebyfirmware);
+	struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+	struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
+	u16 data_offset;
+	u8 frev, crev;
+	int usage_bytes = 0;
+
+	/* Skip atomfirmware allocation for SRIOV VFs when dynamic crit regn is enabled */
+	if (!(amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled)) {
+		if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+			if (frev == 2 && crev == 1) {
+				fw_usage_v2_1 =
+					(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+						fw_usage_v2_1,
+						&usage_bytes);
+			} else if (frev >= 2 && crev >= 2) {
+				fw_usage_v2_2 =
+					(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+				amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+						fw_usage_v2_2,
+						&usage_bytes);
+			}
+		}
+	}
+
+	ctx->scratch_size_bytes = 0;
+	if (usage_bytes == 0)
+		usage_bytes = 20 * 1024;
+	/* allocate some scratch memory */
+	ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL);
+	if (!ctx->scratch)
+		return -ENOMEM;
+	ctx->scratch_size_bytes = usage_bytes;
+	return 0;
+}
+
+union igp_info {
+	struct atom_integrated_system_info_v1_11 v11;
+	struct atom_integrated_system_info_v1_12 v12;
+	struct atom_integrated_system_info_v2_1 v21;
+	struct atom_integrated_system_info_v2_3 v23;
+};
+
+union umc_info {
+	struct atom_umc_info_v3_1 v31;
+	struct atom_umc_info_v3_2 v32;
+	struct atom_umc_info_v3_3 v33;
+	struct atom_umc_info_v4_0 v40;
+};
+
+union vram_info {
+	struct atom_vram_info_header_v2_3 v23;
+	struct atom_vram_info_header_v2_4 v24;
+	struct atom_vram_info_header_v2_5 v25;
+	struct atom_vram_info_header_v2_6 v26;
+	struct atom_vram_info_header_v3_0 v30;
+};
+
+union vram_module {
+	struct atom_vram_module_v9 v9;
+	struct atom_vram_module_v10 v10;
+	struct atom_vram_module_v11 v11;
+	struct atom_vram_module_v3_0 v30;
+};
+
+static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
+					      int atom_mem_type)
+{
+	int vram_type;
+
+	if (adev->flags & AMD_IS_APU) {
+		switch (atom_mem_type) {
+		case Ddr2MemType:
+		case LpDdr2MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR2;
+			break;
+		case Ddr3MemType:
+		case LpDdr3MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR3;
+			break;
+		case Ddr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR4;
+			break;
+		case LpDdr4MemType:
+			vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
+			break;
+		case Ddr5MemType:
+			vram_type = AMDGPU_VRAM_TYPE_DDR5;
+			break;
+		case LpDdr5MemType:
+			vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	} else {
+		switch (atom_mem_type) {
+		case ATOM_DGPU_VRAM_TYPE_GDDR5:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_HBM2:
+		case ATOM_DGPU_VRAM_TYPE_HBM2E:
+		case ATOM_DGPU_VRAM_TYPE_HBM3:
+			vram_type = AMDGPU_VRAM_TYPE_HBM;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_GDDR6:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR6;
+			break;
+		case ATOM_DGPU_VRAM_TYPE_HBM3E:
+			vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+			break;
+		default:
+			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+			break;
+		}
+	}
+
+	return vram_type;
+}
+
+int
+amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+				  int *vram_width, int *vram_type,
+				  int *vram_vendor)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index, i = 0;
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	union vram_info *vram_info;
+	union umc_info *umc_info;
+	union vram_module *vram_module;
+	u8 frev, crev;
+	u8 mem_type;
+	u8 mem_vendor;
+	u32 mem_channel_number;
+	u32 mem_channel_width;
+	u32 module_id;
+
+	if (adev->flags & AMD_IS_APU)
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    integratedsysteminfo);
+	else {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(12, 0, 0):
+		case IP_VERSION(12, 0, 1):
+			index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
+			break;
+		default:
+			index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
+		}
+	}
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+					  index, &size,
+					  &frev, &crev, &data_offset)) {
+		if (adev->flags & AMD_IS_APU) {
+			igp_info = (union igp_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (frev) {
+			case 1:
+				switch (crev) {
+				case 11:
+				case 12:
+					mem_channel_number = igp_info->v11.umachannelnumber;
+					if (!mem_channel_number)
+						mem_channel_number = 1;
+					mem_type = igp_info->v11.memorytype;
+					if (mem_type == LpDdr5MemType)
+						mem_channel_width = 32;
+					else
+						mem_channel_width = 64;
+					if (vram_width)
+						*vram_width = mem_channel_number * mem_channel_width;
+					if (vram_type)
+						*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+					break;
+				default:
+					return -EINVAL;
+				}
+				break;
+			case 2:
+				switch (crev) {
+				case 1:
+				case 2:
+					mem_channel_number = igp_info->v21.umachannelnumber;
+					if (!mem_channel_number)
+						mem_channel_number = 1;
+					mem_type = igp_info->v21.memorytype;
+					if (mem_type == LpDdr5MemType)
+						mem_channel_width = 32;
+					else
+						mem_channel_width = 64;
+					if (vram_width)
+						*vram_width = mem_channel_number * mem_channel_width;
+					if (vram_type)
+						*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+					break;
+				case 3:
+					mem_channel_number = igp_info->v23.umachannelnumber;
+					if (!mem_channel_number)
+						mem_channel_number = 1;
+					mem_type = igp_info->v23.memorytype;
+					if (mem_type == LpDdr5MemType)
+						mem_channel_width = 32;
+					else
+						mem_channel_width = 64;
+					if (vram_width)
+						*vram_width = mem_channel_number * mem_channel_width;
+					if (vram_type)
+						*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+					break;
+				default:
+					return -EINVAL;
+				}
+				break;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+			case IP_VERSION(12, 0, 0):
+			case IP_VERSION(12, 0, 1):
+				umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+
+				if (frev == 4) {
+					switch (crev) {
+					case 0:
+						mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
+						mem_type = le32_to_cpu(umc_info->v40.vram_type);
+						mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
+						mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						if (vram_width)
+							*vram_width = mem_channel_number * (1 << mem_channel_width);
+						break;
+					default:
+						return -EINVAL;
+					}
+				} else
+					return -EINVAL;
+				break;
+			default:
+				vram_info = (union vram_info *)
+					(mode_info->atom_context->bios + data_offset);
+
+				module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
+				if (frev == 3) {
+					switch (crev) {
+					/* v30 */
+					case 0:
+						vram_module = (union vram_module *)vram_info->v30.vram_module;
+						mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						mem_type = vram_info->v30.memory_type;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						mem_channel_number = vram_info->v30.channel_num;
+						mem_channel_width = vram_info->v30.channel_width;
+						if (vram_width)
+							*vram_width = mem_channel_number * 16;
+						break;
+					default:
+						return -EINVAL;
+					}
+				} else if (frev == 2) {
+					switch (crev) {
+					/* v23 */
+					case 3:
+						if (module_id > vram_info->v23.vram_module_num)
+							module_id = 0;
+						vram_module = (union vram_module *)vram_info->v23.vram_module;
+						while (i < module_id) {
+							vram_module = (union vram_module *)
+								((u8 *)vram_module + vram_module->v9.vram_module_size);
+							i++;
+						}
+						mem_type = vram_module->v9.memory_type;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						mem_channel_number = vram_module->v9.channel_num;
+						mem_channel_width = vram_module->v9.channel_width;
+						if (vram_width)
+							*vram_width = mem_channel_number * (1 << mem_channel_width);
+						mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						break;
+					/* v24 */
+					case 4:
+						if (module_id > vram_info->v24.vram_module_num)
+							module_id = 0;
+						vram_module = (union vram_module *)vram_info->v24.vram_module;
+						while (i < module_id) {
+							vram_module = (union vram_module *)
+								((u8 *)vram_module + vram_module->v10.vram_module_size);
+							i++;
+						}
+						mem_type = vram_module->v10.memory_type;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						mem_channel_number = vram_module->v10.channel_num;
+						mem_channel_width = vram_module->v10.channel_width;
+						if (vram_width)
+							*vram_width = mem_channel_number * (1 << mem_channel_width);
+						mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						break;
+					/* v25 */
+					case 5:
+						if (module_id > vram_info->v25.vram_module_num)
+							module_id = 0;
+						vram_module = (union vram_module *)vram_info->v25.vram_module;
+						while (i < module_id) {
+							vram_module = (union vram_module *)
+								((u8 *)vram_module + vram_module->v11.vram_module_size);
+							i++;
+						}
+						mem_type = vram_module->v11.memory_type;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						mem_channel_number = vram_module->v11.channel_num;
+						mem_channel_width = vram_module->v11.channel_width;
+						if (vram_width)
+							*vram_width = mem_channel_number * (1 << mem_channel_width);
+						mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						break;
+					/* v26 */
+					case 6:
+						if (module_id > vram_info->v26.vram_module_num)
+							module_id = 0;
+						vram_module = (union vram_module *)vram_info->v26.vram_module;
+						while (i < module_id) {
+							vram_module = (union vram_module *)
+								((u8 *)vram_module + vram_module->v9.vram_module_size);
+							i++;
+						}
+						mem_type = vram_module->v9.memory_type;
+						if (vram_type)
+							*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+						mem_channel_number = vram_module->v9.channel_num;
+						mem_channel_width = vram_module->v9.channel_width;
+						if (vram_width)
+							*vram_width = mem_channel_number * (1 << mem_channel_width);
+						mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+						if (vram_vendor)
+							*vram_vendor = mem_vendor;
+						break;
+					default:
+						return -EINVAL;
+					}
+				} else {
+					/* invalid frev */
+					return -EINVAL;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Return true if vbios enabled ecc by default, if umc info table is available
+ * or false if ecc is not enabled or umc info table is not available
+ */
+bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union umc_info *umc_info;
+	u8 frev, crev;
+	bool mem_ecc_enabled = false;
+	u8 umc_config;
+	u32 umc_config1;
+	adev->ras_default_ecc_enabled = false;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+			umc_info);
+
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+				index, &size, &frev, &crev, &data_offset)) {
+		umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
+		if (frev == 3) {
+			switch (crev) {
+			case 1:
+				umc_config = le32_to_cpu(umc_info->v31.umc_config);
+				mem_ecc_enabled =
+					(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+				break;
+			case 2:
+				umc_config = le32_to_cpu(umc_info->v32.umc_config);
+				mem_ecc_enabled =
+					(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+				break;
+			case 3:
+				umc_config = le32_to_cpu(umc_info->v33.umc_config);
+				umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
+				mem_ecc_enabled =
+					((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
+					 (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+				adev->ras_default_ecc_enabled =
+					(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+				break;
+			default:
+				/* unsupported crev */
+				return false;
+			}
+		} else if (frev == 4) {
+			switch (crev) {
+			case 0:
+				umc_config = le32_to_cpu(umc_info->v40.umc_config);
+				umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+				mem_ecc_enabled =
+					(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+				adev->ras_default_ecc_enabled =
+					(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
+				break;
+			default:
+				/* unsupported crev */
+				return false;
+			}
+		} else {
+			/* unsupported frev */
+			return false;
+		}
+	}
+
+	return mem_ecc_enabled;
+}
+
+/*
+ * Helper function to query sram ecc capablity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports sram ecc or false if not
+ */
+bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev)
+{
+	u32 fw_cap;
+
+	fw_cap = adev->mode_info.firmware_flags;
+
+	return (fw_cap & ATOM_FIRMWARE_CAP_SRAM_ECC) ? true : false;
+}
+
+/*
+ * Helper function to query dynamic boot config capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if vbios supports dynamic boot config or false if not
+ */
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev)
+{
+	u32 fw_cap;
+
+	fw_cap = adev->mode_info.firmware_flags;
+
+	return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
+}
+
+/**
+ * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
+ * @adev: amdgpu_device pointer
+ * @i2c_address: pointer to u8; if not NULL, will contain
+ *    the RAS EEPROM address if the function returns true
+ *
+ * Return true if VBIOS supports RAS EEPROM address reporting,
+ * else return false. If true and @i2c_address is not NULL,
+ * will contain the RAS ROM address.
+ */
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
+				      u8 *i2c_address)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	u16 data_offset, size;
+	union firmware_info *firmware_info;
+	u8 frev, crev;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    firmwareinfo);
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
+					  index, &size, &frev, &crev,
+					  &data_offset)) {
+		/* support firmware_info 3.4 + */
+		if ((frev == 3 && crev >= 4) || (frev > 3)) {
+			firmware_info = (union firmware_info *)
+				(mode_info->atom_context->bios + data_offset);
+			/* The ras_rom_i2c_slave_addr should ideally
+			 * be a 19-bit EEPROM address, which would be
+			 * used as is by the driver; see top of
+			 * amdgpu_eeprom.c.
+			 *
+			 * When this is the case, 0 is of course a
+			 * valid RAS EEPROM address, in which case,
+			 * we'll drop the first "if (firm...)" and only
+			 * leave the check for the pointer.
+			 *
+			 * The reason this works right now is because
+			 * ras_rom_i2c_slave_addr contains the EEPROM
+			 * device type qualifier 1010b in the top 4
+			 * bits.
+			 */
+			if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+				if (i2c_address)
+					*i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+
+union smu_info {
+	struct atom_smu_info_v3_1 v31;
+	struct atom_smu_info_v4_0 v40;
+};
+
+union gfx_info {
+	struct atom_gfx_info_v2_2 v22;
+	struct atom_gfx_info_v2_4 v24;
+	struct atom_gfx_info_v2_7 v27;
+	struct atom_gfx_info_v3_0 v30;
+};
+
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	struct amdgpu_pll *spll = &adev->clock.spll;
+	struct amdgpu_pll *mpll = &adev->clock.mpll;
+	uint8_t frev, crev;
+	uint16_t data_offset;
+	int ret = -EINVAL, index;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    firmwareinfo);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union firmware_info *firmware_info =
+			(union firmware_info *)(mode_info->atom_context->bios +
+						data_offset);
+
+		adev->clock.default_sclk =
+			le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+		adev->clock.default_mclk =
+			le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+
+		adev->pm.current_sclk = adev->clock.default_sclk;
+		adev->pm.current_mclk = adev->clock.default_mclk;
+
+		ret = 0;
+	}
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    smu_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union smu_info *smu_info =
+			(union smu_info *)(mode_info->atom_context->bios +
+					   data_offset);
+
+		/* system clock */
+		if (frev == 3)
+			spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz);
+		else if (frev == 4)
+			spll->reference_freq = le32_to_cpu(smu_info->v40.core_refclk_10khz);
+
+		spll->reference_div = 0;
+		spll->min_post_div = 1;
+		spll->max_post_div = 1;
+		spll->min_ref_div = 2;
+		spll->max_ref_div = 0xff;
+		spll->min_feedback_div = 4;
+		spll->max_feedback_div = 0xff;
+		spll->best_vco = 0;
+
+		ret = 0;
+	}
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    umc_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union umc_info *umc_info =
+			(union umc_info *)(mode_info->atom_context->bios +
+					   data_offset);
+
+		/* memory clock */
+		mpll->reference_freq = le32_to_cpu(umc_info->v31.mem_refclk_10khz);
+
+		mpll->reference_div = 0;
+		mpll->min_post_div = 1;
+		mpll->max_post_div = 1;
+		mpll->min_ref_div = 2;
+		mpll->max_ref_div = 0xff;
+		mpll->min_feedback_div = 4;
+		mpll->max_feedback_div = 0xff;
+		mpll->best_vco = 0;
+
+		ret = 0;
+	}
+
+	/* if asic is Navi+, the rlc reference clock is used for system clock
+	 * from vbios gfx_info table */
+	if (adev->asic_type >= CHIP_NAVI10) {
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						   gfx_info);
+		if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+					  &frev, &crev, &data_offset)) {
+			union gfx_info *gfx_info = (union gfx_info *)
+				(mode_info->atom_context->bios + data_offset);
+			if ((frev == 3) ||
+			    (frev == 2 && crev == 6)) {
+				spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk);
+				ret = 0;
+			} else if ((frev == 2) &&
+				   (crev >= 2) &&
+				   (crev != 6)) {
+				spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk);
+				ret = 0;
+			} else {
+				BUG();
+			}
+		}
+	}
+
+	return ret;
+}
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	uint8_t frev, crev;
+	uint16_t data_offset;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    gfx_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union gfx_info *gfx_info = (union gfx_info *)
+			(mode_info->atom_context->bios + data_offset);
+		if (frev == 2) {
+			switch (crev) {
+			case 4:
+				adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
+				adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
+				adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
+				adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
+				adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
+				adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+				adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+				adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+				adev->gfx.config.gs_prim_buffer_depth =
+					le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+				adev->gfx.config.double_offchip_lds_buf =
+					gfx_info->v24.gc_double_offchip_lds_buffer;
+				adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+				adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+				adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+				adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+				return 0;
+			case 7:
+				adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
+				adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
+				adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
+				adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
+				adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
+				adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
+				adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
+				adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
+				adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
+				adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
+				adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
+				adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
+				adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
+				adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
+				return 0;
+			default:
+				return -EINVAL;
+			}
+		} else if (frev == 3) {
+			switch (crev) {
+			case 0:
+				adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines;
+				adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh;
+				adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se;
+				adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se;
+				adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches;
+				return 0;
+			default:
+				return -EINVAL;
+			}
+		} else {
+			return -EINVAL;
+		}
+
+	}
+	return -EINVAL;
+}
+
+/*
+ * Helper function to query two stage mem training capability
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return true if two stage mem training is supported or false if not
+ */
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev)
+{
+	u32 fw_cap;
+
+	fw_cap = adev->mode_info.firmware_flags;
+
+	return (fw_cap & ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING) ? true : false;
+}
+
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	union firmware_info *firmware_info;
+	int index;
+	u16 data_offset, size;
+	u8 frev, crev;
+	int fw_reserved_fb_size;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+			firmwareinfo);
+
+	if (!amdgpu_atom_parse_data_header(ctx, index, &size,
+				&frev, &crev, &data_offset))
+		/* fail to parse data_header */
+		return 0;
+
+	firmware_info = (union firmware_info *)(ctx->bios + data_offset);
+
+	if (frev != 3)
+		return -EINVAL;
+
+	switch (crev) {
+	case 4:
+		fw_reserved_fb_size =
+			(firmware_info->v34.fw_reserved_size_in_kb << 10);
+		break;
+	case 5:
+		fw_reserved_fb_size =
+			(firmware_info->v35.fw_reserved_size_in_kb << 10);
+		break;
+	default:
+		fw_reserved_fb_size = 0;
+		break;
+	}
+
+	return fw_reserved_fb_size;
+}
+
+/*
+ * Helper function to execute asic_init table
+ *
+ * @adev: amdgpu_device pointer
+ * @fb_reset: flag to indicate whether fb is reset or not
+ *
+ * Return 0 if succeed, otherwise failed
+ */
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	struct atom_context *ctx;
+	uint8_t frev, crev;
+	uint16_t data_offset;
+	uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz;
+	struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1;
+	int index;
+
+	if (!mode_info)
+		return -EINVAL;
+
+	ctx = mode_info->atom_context;
+	if (!ctx)
+		return -EINVAL;
+
+	/* query bootup sclk/mclk from firmware_info table */
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    firmwareinfo);
+	if (amdgpu_atom_parse_data_header(ctx, index, NULL,
+				&frev, &crev, &data_offset)) {
+		union firmware_info *firmware_info =
+			(union firmware_info *)(ctx->bios +
+						data_offset);
+
+		bootup_sclk_in10khz =
+			le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+		bootup_mclk_in10khz =
+			le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+	} else {
+		return -EINVAL;
+	}
+
+	index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
+					asic_init);
+	if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
+		if (frev == 2 && crev >= 1) {
+			memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
+			asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz;
+			asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz;
+			asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT;
+			if (!fb_reset)
+				asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT;
+			else
+				asic_init_ps_v2_1.param.memparam.memflag = 0;
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1,
+		sizeof(asic_init_ps_v2_1));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
new file mode 100644
index 000000000000..649b5530d8ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_ATOMFIRMWARE_H__
+#define __AMDGPU_ATOMFIRMWARE_H__
+
+#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t))
+
+uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_gpu_virtualization_supported(struct amdgpu_device *adev);
+void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
+	int *vram_width, int *vram_type, int *vram_vendor);
+int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address);
+bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
+bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
new file mode 100644
index 000000000000..3893e6fc2f03
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * ATPX support for both Intel/ATI
+ */
+#include <linux/vga_switcheroo.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "amdgpu.h"
+#include "amd_acpi.h"
+
+#define AMDGPU_PX_QUIRK_FORCE_ATPX  (1 << 0)
+
+struct amdgpu_px_quirk {
+	u32 chip_vendor;
+	u32 chip_device;
+	u32 subsys_vendor;
+	u32 subsys_device;
+	u32 px_quirk_flags;
+};
+
+struct amdgpu_atpx_functions {
+	bool px_params;
+	bool power_cntl;
+	bool disp_mux_cntl;
+	bool i2c_mux_cntl;
+	bool switch_start;
+	bool switch_end;
+	bool disp_connectors_mapping;
+	bool disp_detection_ports;
+};
+
+struct amdgpu_atpx {
+	acpi_handle handle;
+	struct amdgpu_atpx_functions functions;
+	bool is_hybrid;
+	bool dgpu_req_power_for_displays;
+};
+
+static struct amdgpu_atpx_priv {
+	bool atpx_detected;
+	bool bridge_pm_usable;
+	unsigned int quirks;
+	/* handle for device - and atpx */
+	acpi_handle dhandle;
+	acpi_handle other_handle;
+	struct amdgpu_atpx atpx;
+} amdgpu_atpx_priv;
+
+struct atpx_verify_interface {
+	u16 size;		/* structure size in bytes (includes size field) */
+	u16 version;		/* version */
+	u32 function_bits;	/* supported functions bit vector */
+} __packed;
+
+struct atpx_px_params {
+	u16 size;		/* structure size in bytes (includes size field) */
+	u32 valid_flags;	/* which flags are valid */
+	u32 flags;		/* flags */
+} __packed;
+
+struct atpx_power_control {
+	u16 size;
+	u8 dgpu_state;
+} __packed;
+
+struct atpx_mux {
+	u16 size;
+	u16 mux;
+} __packed;
+
+bool amdgpu_has_atpx(void)
+{
+	return amdgpu_atpx_priv.atpx_detected;
+}
+
+bool amdgpu_has_atpx_dgpu_power_cntl(void)
+{
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void)
+{
+	return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
+/**
+ * amdgpu_atpx_call - call an ATPX method
+ *
+ * @handle: acpi handle
+ * @function: the ATPX function to execute
+ * @params: ATPX function params
+ *
+ * Executes the requested ATPX function (all asics).
+ * Returns a pointer to the acpi output buffer.
+ */
+static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
+					   struct acpi_buffer *params)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2];
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = function;
+
+	if (params) {
+		atpx_arg_elements[1].type = ACPI_TYPE_BUFFER;
+		atpx_arg_elements[1].buffer.length = params->length;
+		atpx_arg_elements[1].buffer.pointer = params->pointer;
+	} else {
+		/* We need a second fake parameter */
+		atpx_arg_elements[1].type = ACPI_TYPE_INTEGER;
+		atpx_arg_elements[1].integer.value = 0;
+	}
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+
+	/* Fail only if calling the method fails and ATPX is supported */
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+		pr_err("failed to evaluate ATPX got %s\n",
+		       acpi_format_exception(status));
+		kfree(buffer.pointer);
+		return NULL;
+	}
+
+	return buffer.pointer;
+}
+
+/**
+ * amdgpu_atpx_parse_functions - parse supported functions
+ *
+ * @f: supported functions struct
+ * @mask: supported functions mask from ATPX
+ *
+ * Use the supported functions mask from ATPX function
+ * ATPX_FUNCTION_VERIFY_INTERFACE to determine what functions
+ * are supported (all asics).
+ */
+static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mask)
+{
+	f->px_params = mask & ATPX_GET_PX_PARAMETERS_SUPPORTED;
+	f->power_cntl = mask & ATPX_POWER_CONTROL_SUPPORTED;
+	f->disp_mux_cntl = mask & ATPX_DISPLAY_MUX_CONTROL_SUPPORTED;
+	f->i2c_mux_cntl = mask & ATPX_I2C_MUX_CONTROL_SUPPORTED;
+	f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;
+	f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;
+	f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED;
+	f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
+}
+
+/**
+ * amdgpu_atpx_validate - validate ATPX functions
+ *
+ * @atpx: amdgpu atpx struct
+ *
+ * Validate that required functions are enabled (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
+{
+	u32 valid_bits = 0;
+
+	if (atpx->functions.px_params) {
+		union acpi_object *info;
+		struct atpx_px_params output;
+		size_t size;
+
+		info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
+		if (!info)
+			return -EIO;
+
+		memset(&output, 0, sizeof(output));
+
+		size = *(u16 *) info->buffer.pointer;
+		if (size < 10) {
+			pr_err("ATPX buffer is too small: %zu\n", size);
+			kfree(info);
+			return -EINVAL;
+		}
+		size = min(sizeof(output), size);
+
+		memcpy(&output, info->buffer.pointer, size);
+
+		valid_bits = output.flags & output.valid_flags;
+
+		kfree(info);
+	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) {
+			pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n");
+			atpx->functions.power_cntl = true;
+			atpx->is_hybrid = false;
+		} else {
+			pr_notice("ATPX Hybrid Graphics\n");
+			/*
+			 * Disable legacy PM methods only when pcie port PM is usable,
+			 * otherwise the device might fail to power off or power on.
+			 */
+			atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
+			atpx->is_hybrid = true;
+		}
+	}
+
+	atpx->dgpu_req_power_for_displays = false;
+	if (valid_bits & ATPX_DGPU_REQ_POWER_FOR_DISPLAYS)
+		atpx->dgpu_req_power_for_displays = true;
+
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_verify_interface - verify ATPX
+ *
+ * @atpx: amdgpu atpx struct
+ *
+ * Execute the ATPX_FUNCTION_VERIFY_INTERFACE ATPX function
+ * to initialize ATPX and determine what features are supported
+ * (all asics).
+ * returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx)
+{
+	union acpi_object *info;
+	struct atpx_verify_interface output;
+	size_t size;
+	int err = 0;
+
+	info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_VERIFY_INTERFACE, NULL);
+	if (!info)
+		return -EIO;
+
+	memset(&output, 0, sizeof(output));
+
+	size = *(u16 *) info->buffer.pointer;
+	if (size < 8) {
+		pr_err("ATPX buffer is too small: %zu\n", size);
+		err = -EINVAL;
+		goto out;
+	}
+	size = min(sizeof(output), size);
+
+	memcpy(&output, info->buffer.pointer, size);
+
+	/* TODO: check version? */
+	pr_notice("ATPX version %u, functions 0x%08x\n",
+		  output.version, output.function_bits);
+
+	amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits);
+
+out:
+	kfree(info);
+	return err;
+}
+
+/**
+ * amdgpu_atpx_set_discrete_state - power up/down discrete GPU
+ *
+ * @atpx: atpx info struct
+ * @state: discrete GPU state (0 = power down, 1 = power up)
+ *
+ * Execute the ATPX_FUNCTION_POWER_CONTROL ATPX function to
+ * power down/up the discrete GPU (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
+{
+	struct acpi_buffer params;
+	union acpi_object *info;
+	struct atpx_power_control input;
+
+	if (atpx->functions.power_cntl) {
+		input.size = 3;
+		input.dgpu_state = state;
+		params.length = input.size;
+		params.pointer = &input;
+		info = amdgpu_atpx_call(atpx->handle,
+					ATPX_FUNCTION_POWER_CONTROL,
+					&params);
+		if (!info)
+			return -EIO;
+		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_disp_mux - switch display mux
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_DISPLAY_MUX_CONTROL ATPX function to
+ * switch the display mux between the discrete GPU and integrated GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_disp_mux(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+	struct acpi_buffer params;
+	union acpi_object *info;
+	struct atpx_mux input;
+
+	if (atpx->functions.disp_mux_cntl) {
+		input.size = 4;
+		input.mux = mux_id;
+		params.length = input.size;
+		params.pointer = &input;
+		info = amdgpu_atpx_call(atpx->handle,
+					ATPX_FUNCTION_DISPLAY_MUX_CONTROL,
+					&params);
+		if (!info)
+			return -EIO;
+		kfree(info);
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_i2c_mux - switch i2c/hpd mux
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_I2C_MUX_CONTROL ATPX function to
+ * switch the i2c/hpd mux between the discrete GPU and integrated GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_i2c_mux(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+	struct acpi_buffer params;
+	union acpi_object *info;
+	struct atpx_mux input;
+
+	if (atpx->functions.i2c_mux_cntl) {
+		input.size = 4;
+		input.mux = mux_id;
+		params.length = input.size;
+		params.pointer = &input;
+		info = amdgpu_atpx_call(atpx->handle,
+					ATPX_FUNCTION_I2C_MUX_CONTROL,
+					&params);
+		if (!info)
+			return -EIO;
+		kfree(info);
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_start - notify the sbios of a GPU switch
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION ATPX
+ * function to notify the sbios that a switch between the discrete GPU and
+ * integrated GPU has begun (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_start(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+	struct acpi_buffer params;
+	union acpi_object *info;
+	struct atpx_mux input;
+
+	if (atpx->functions.switch_start) {
+		input.size = 4;
+		input.mux = mux_id;
+		params.length = input.size;
+		params.pointer = &input;
+		info = amdgpu_atpx_call(atpx->handle,
+					ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION,
+					&params);
+		if (!info)
+			return -EIO;
+		kfree(info);
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_switch_end - notify the sbios of a GPU switch
+ *
+ * @atpx: atpx info struct
+ * @mux_id: mux state (0 = integrated GPU, 1 = discrete GPU)
+ *
+ * Execute the ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION ATPX
+ * function to notify the sbios that a switch between the discrete GPU and
+ * integrated GPU has ended (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switch_end(struct amdgpu_atpx *atpx, u16 mux_id)
+{
+	struct acpi_buffer params;
+	union acpi_object *info;
+	struct atpx_mux input;
+
+	if (atpx->functions.switch_end) {
+		input.size = 4;
+		input.mux = mux_id;
+		params.length = input.size;
+		params.pointer = &input;
+		info = amdgpu_atpx_call(atpx->handle,
+					ATPX_FUNCTION_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION,
+					&params);
+		if (!info)
+			return -EIO;
+		kfree(info);
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_switchto - switch to the requested GPU
+ *
+ * @id: GPU to switch to
+ *
+ * Execute the necessary ATPX functions to switch between the discrete GPU and
+ * integrated GPU (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_switchto(enum vga_switcheroo_client_id id)
+{
+	u16 gpu_id;
+
+	if (id == VGA_SWITCHEROO_IGD)
+		gpu_id = ATPX_INTEGRATED_GPU;
+	else
+		gpu_id = ATPX_DISCRETE_GPU;
+
+	amdgpu_atpx_switch_start(&amdgpu_atpx_priv.atpx, gpu_id);
+	amdgpu_atpx_switch_disp_mux(&amdgpu_atpx_priv.atpx, gpu_id);
+	amdgpu_atpx_switch_i2c_mux(&amdgpu_atpx_priv.atpx, gpu_id);
+	amdgpu_atpx_switch_end(&amdgpu_atpx_priv.atpx, gpu_id);
+
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_power_state - power down/up the requested GPU
+ *
+ * @id: GPU to power down/up
+ * @state: requested power state (0 = off, 1 = on)
+ *
+ * Execute the necessary ATPX function to power down/up the discrete GPU
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	/* on w500 ACPI can't change intel gpu state */
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	amdgpu_atpx_set_discrete_state(&amdgpu_atpx_priv.atpx, state);
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_pci_probe_handle - look up the ATPX handle
+ *
+ * @pdev: pci device
+ *
+ * Look up the ATPX handles (all asics).
+ * Returns true if the handles are found, false if not.
+ */
+static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, atpx_handle;
+	acpi_status status;
+
+	dhandle = ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+	if (ACPI_FAILURE(status)) {
+		amdgpu_atpx_priv.other_handle = dhandle;
+		return false;
+	}
+	amdgpu_atpx_priv.dhandle = dhandle;
+	amdgpu_atpx_priv.atpx.handle = atpx_handle;
+	return true;
+}
+
+/**
+ * amdgpu_atpx_init - verify the ATPX interface
+ *
+ * Verify the ATPX interface (all asics).
+ * Returns 0 on success, error on failure.
+ */
+static int amdgpu_atpx_init(void)
+{
+	int r;
+
+	/* set up the ATPX handle */
+	r = amdgpu_atpx_verify_interface(&amdgpu_atpx_priv.atpx);
+	if (r)
+		return r;
+
+	/* validate the atpx setup */
+	r = amdgpu_atpx_validate(&amdgpu_atpx_priv.atpx);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+/**
+ * amdgpu_atpx_get_client_id - get the client id
+ *
+ * @pdev: pci device
+ *
+ * look up whether we are the integrated or discrete GPU (all asics).
+ * Returns the client id.
+ */
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+{
+	if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
+	.switchto = amdgpu_atpx_switchto,
+	.power_state = amdgpu_atpx_power_state,
+	.get_client_id = amdgpu_atpx_get_client_id,
+};
+
+static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
+	/* HG _PR3 doesn't seem to work on this A+A weston board */
+	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0, 0, 0, 0, 0 },
+};
+
+static void amdgpu_atpx_get_quirks(struct pci_dev *pdev)
+{
+	const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list;
+
+	/* Apply PX quirks */
+	while (p && p->chip_device != 0) {
+		if (pdev->vendor == p->chip_vendor &&
+		    pdev->device == p->chip_device &&
+		    pdev->subsystem_vendor == p->subsys_vendor &&
+		    pdev->subsystem_device == p->subsys_device) {
+			amdgpu_atpx_priv.quirks |= p->px_quirk_flags;
+			break;
+		}
+		++p;
+	}
+}
+
+/**
+ * amdgpu_atpx_detect - detect whether we have PX
+ *
+ * Check if we have a PX system (all asics).
+ * Returns true if we have a PX system, false if not.
+ */
+static bool amdgpu_atpx_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	bool has_atpx = false;
+	int vga_count = 0;
+	bool d3_supported = false;
+	struct pci_dev *parent_pdev;
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= amdgpu_atpx_pci_probe_handle(pdev);
+
+		parent_pdev = pci_upstream_bridge(pdev);
+		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
+	}
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= amdgpu_atpx_pci_probe_handle(pdev);
+
+		parent_pdev = pci_upstream_bridge(pdev);
+		d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+		amdgpu_atpx_get_quirks(pdev);
+	}
+
+	if (has_atpx && vga_count == 2) {
+		acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer);
+		pr_info("vga_switcheroo: detected switching method %s handle\n",
+			acpi_method_name);
+		amdgpu_atpx_priv.atpx_detected = true;
+		amdgpu_atpx_priv.bridge_pm_usable = d3_supported;
+		amdgpu_atpx_init();
+		return true;
+	}
+	return false;
+}
+
+/**
+ * amdgpu_register_atpx_handler - register with vga_switcheroo
+ *
+ * Register the PX callbacks with vga_switcheroo (all asics).
+ */
+void amdgpu_register_atpx_handler(void)
+{
+	bool r;
+	enum vga_switcheroo_handler_flags_t handler_flags = 0;
+
+	/* detect if we have any ATPX + 2 VGA in the system */
+	r = amdgpu_atpx_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&amdgpu_atpx_handler, handler_flags);
+}
+
+/**
+ * amdgpu_unregister_atpx_handler - unregister with vga_switcheroo
+ *
+ * Unregister the PX callbacks with vga_switcheroo (all asics).
+ */
+void amdgpu_unregister_atpx_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
new file mode 100644
index 000000000000..199693369c7c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Jerome Glisse
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+
+#define AMDGPU_BENCHMARK_ITERATIONS 1024
+#define AMDGPU_BENCHMARK_COMMON_MODES_N 17
+
+static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
+				    uint64_t saddr, uint64_t daddr, int n, s64 *time_ms)
+{
+	ktime_t stime, etime;
+	struct dma_fence *fence;
+	int i, r;
+
+	stime = ktime_get();
+	for (i = 0; i < n; i++) {
+		struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+		r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
+				       false, false, 0);
+		if (r)
+			goto exit_do_move;
+		r = dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+		if (r)
+			goto exit_do_move;
+	}
+
+exit_do_move:
+	etime = ktime_get();
+	*time_ms = ktime_ms_delta(etime, stime);
+
+	return r;
+}
+
+
+static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
+					 int n, unsigned size,
+					 s64 time_ms,
+					 unsigned sdomain, unsigned ddomain,
+					 char *kind)
+{
+	s64 throughput = (n * (size >> 10));
+
+	throughput = div64_s64(throughput, time_ms);
+
+	dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
+		 " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
+		 kind, n, size >> 10, sdomain, ddomain, time_ms,
+		 throughput * 8, throughput);
+}
+
+static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
+				 unsigned sdomain, unsigned ddomain)
+{
+	struct amdgpu_bo *dobj = NULL;
+	struct amdgpu_bo *sobj = NULL;
+	uint64_t saddr, daddr;
+	s64 time_ms;
+	int r, n;
+
+	n = AMDGPU_BENCHMARK_ITERATIONS;
+
+	r = amdgpu_bo_create_kernel(adev, size,
+				    PAGE_SIZE, sdomain,
+				    &sobj,
+				    &saddr,
+				    NULL);
+	if (r)
+		goto out_cleanup;
+	r = amdgpu_bo_create_kernel(adev, size,
+				    PAGE_SIZE, ddomain,
+				    &dobj,
+				    &daddr,
+				    NULL);
+	if (r)
+		goto out_cleanup;
+
+	if (adev->mman.buffer_funcs) {
+		r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms);
+		if (r)
+			goto out_cleanup;
+		else
+			amdgpu_benchmark_log_results(adev, n, size, time_ms,
+						     sdomain, ddomain, "dma");
+	}
+
+out_cleanup:
+	/* Check error value now. The value can be overwritten when clean up.*/
+	if (r < 0)
+		dev_info(adev->dev, "Error while benchmarking BO move.\n");
+
+	if (sobj)
+		amdgpu_bo_free_kernel(&sobj, &saddr, NULL);
+	if (dobj)
+		amdgpu_bo_free_kernel(&dobj, &daddr, NULL);
+	return r;
+}
+
+int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
+{
+	int i, r;
+	static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
+		640 * 480 * 4,
+		720 * 480 * 4,
+		800 * 600 * 4,
+		848 * 480 * 4,
+		1024 * 768 * 4,
+		1152 * 768 * 4,
+		1280 * 720 * 4,
+		1280 * 800 * 4,
+		1280 * 854 * 4,
+		1280 * 960 * 4,
+		1280 * 1024 * 4,
+		1440 * 900 * 4,
+		1400 * 1050 * 4,
+		1680 * 1050 * 4,
+		1600 * 1200 * 4,
+		1920 * 1080 * 4,
+		1920 * 1200 * 4
+	};
+
+	mutex_lock(&adev->benchmark_mutex);
+	switch (test_number) {
+	case 1:
+		dev_info(adev->dev,
+			 "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
+			 test_number);
+		/* simple test, VRAM to GTT and GTT to VRAM */
+		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
+					  AMDGPU_GEM_DOMAIN_VRAM);
+		if (r)
+			goto done;
+		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+					  AMDGPU_GEM_DOMAIN_GTT);
+		if (r)
+			goto done;
+		break;
+	case 2:
+		dev_info(adev->dev,
+			 "benchmark test: %d (simple test, VRAM to VRAM)\n",
+			 test_number);
+		/* simple test, VRAM to VRAM */
+		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
+					  AMDGPU_GEM_DOMAIN_VRAM);
+		if (r)
+			goto done;
+		break;
+	case 3:
+		dev_info(adev->dev,
+			 "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
+			 test_number);
+		/* GTT to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 16384; i <<= 1) {
+			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+						  AMDGPU_GEM_DOMAIN_GTT,
+						  AMDGPU_GEM_DOMAIN_VRAM);
+			if (r)
+				goto done;
+		}
+		break;
+	case 4:
+		dev_info(adev->dev,
+			 "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
+			 test_number);
+		/* VRAM to GTT, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 16384; i <<= 1) {
+			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+						  AMDGPU_GEM_DOMAIN_VRAM,
+						  AMDGPU_GEM_DOMAIN_GTT);
+			if (r)
+				goto done;
+		}
+		break;
+	case 5:
+		dev_info(adev->dev,
+			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
+			 test_number);
+		/* VRAM to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 16384; i <<= 1) {
+			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
+						  AMDGPU_GEM_DOMAIN_VRAM,
+						  AMDGPU_GEM_DOMAIN_VRAM);
+			if (r)
+				goto done;
+		}
+		break;
+	case 6:
+		dev_info(adev->dev,
+			 "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
+			 test_number);
+		/* GTT to VRAM, buffer size sweep, common modes */
+		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+			r = amdgpu_benchmark_move(adev, common_modes[i],
+						  AMDGPU_GEM_DOMAIN_GTT,
+						  AMDGPU_GEM_DOMAIN_VRAM);
+			if (r)
+				goto done;
+		}
+		break;
+	case 7:
+		dev_info(adev->dev,
+			 "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
+			 test_number);
+		/* VRAM to GTT, buffer size sweep, common modes */
+		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+			r = amdgpu_benchmark_move(adev, common_modes[i],
+						  AMDGPU_GEM_DOMAIN_VRAM,
+						  AMDGPU_GEM_DOMAIN_GTT);
+			if (r)
+				goto done;
+		}
+		break;
+	case 8:
+		dev_info(adev->dev,
+			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
+			 test_number);
+		/* VRAM to VRAM, buffer size sweep, common modes */
+		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
+			r = amdgpu_benchmark_move(adev, common_modes[i],
+					      AMDGPU_GEM_DOMAIN_VRAM,
+					      AMDGPU_GEM_DOMAIN_VRAM);
+			if (r)
+				goto done;
+		}
+		break;
+
+	default:
+		dev_info(adev->dev, "Unknown benchmark %d\n", test_number);
+		r = -EINVAL;
+		break;
+	}
+
+done:
+	mutex_unlock(&adev->benchmark_mutex);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
new file mode 100644
index 000000000000..35d04e69aec0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include "amdgpu.h"
+#include "atom.h"
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+/*
+ * BIOS.
+ */
+
+#define AMD_VBIOS_SIGNATURE " 761295520"
+#define AMD_VBIOS_SIGNATURE_OFFSET 0x30
+#define AMD_VBIOS_SIGNATURE_SIZE sizeof(AMD_VBIOS_SIGNATURE)
+#define AMD_VBIOS_SIGNATURE_END (AMD_VBIOS_SIGNATURE_OFFSET + AMD_VBIOS_SIGNATURE_SIZE)
+#define AMD_IS_VALID_VBIOS(p) ((p)[0] == 0x55 && (p)[1] == 0xAA)
+#define AMD_VBIOS_LENGTH(p) ((p)[2] << 9)
+
+/* Check if current bios is an ATOM BIOS.
+ * Return true if it is ATOM BIOS. Otherwise, return false.
+ */
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
+{
+	uint16_t tmp, bios_header_start;
+	uint8_t *bios = adev->bios;
+
+	if (!bios || size < 0x49) {
+		dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
+		return false;
+	}
+
+	if (!AMD_IS_VALID_VBIOS(bios)) {
+		dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+			bios[1]);
+		return false;
+	}
+
+	bios_header_start = bios[0x48] | (bios[0x49] << 8);
+	if (!bios_header_start) {
+		dev_dbg(adev->dev, "Can't locate VBIOS header\n");
+		return false;
+	}
+
+	tmp = bios_header_start + 4;
+	if (size < tmp) {
+		dev_dbg(adev->dev, "VBIOS header is broken\n");
+		return false;
+	}
+
+	if (!memcmp(bios + tmp, "ATOM", 4) ||
+	    !memcmp(bios + tmp, "MOTA", 4)) {
+		dev_dbg(adev->dev, "ATOMBIOS detected\n");
+		return true;
+	}
+
+	return false;
+}
+
+void amdgpu_bios_release(struct amdgpu_device *adev)
+{
+	kfree(adev->bios);
+	adev->bios = NULL;
+	adev->bios_size = 0;
+}
+
+/* If you boot an IGP board with a discrete card as the primary,
+ * the IGP rom is not accessible via the rom bar as the IGP rom is
+ * part of the system bios.  On boot, the system bios puts a
+ * copy of the igp rom at the start of vram if a discrete card is
+ * present.
+ * For SR-IOV, if dynamic critical region is not enabled,
+ * the vbios image is also put at the start of VRAM in the VF.
+ */
+static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
+{
+	uint8_t __iomem *bios = NULL;
+	resource_size_t vram_base;
+	u32 size = 256U * 1024U; /* ??? */
+
+	if (!(adev->flags & AMD_IS_APU))
+		if (amdgpu_device_need_post(adev))
+			return false;
+
+	/* FB BAR not enabled */
+	if (pci_resource_len(adev->pdev, 0) == 0)
+		return false;
+
+	adev->bios = NULL;
+	vram_base = pci_resource_start(adev->pdev, 0);
+
+	adev->bios = kmalloc(size, GFP_KERNEL);
+	if (!adev->bios)
+		return false;
+
+	/* For SRIOV with dynamic critical region is enabled,
+	 * the vbios image is put at a dynamic offset of VRAM in the VF.
+	 * If dynamic critical region is disabled, follow the existing logic as on baremetal.
+	 */
+	if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+		if (amdgpu_virt_get_dynamic_data_info(adev,
+				AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID, adev->bios, &size)) {
+			amdgpu_bios_release(adev);
+			return false;
+		}
+	} else {
+		bios = ioremap_wc(vram_base, size);
+		if (!bios) {
+			amdgpu_bios_release(adev);
+			return false;
+		}
+
+		memcpy_fromio(adev->bios, bios, size);
+		iounmap(bios);
+	}
+
+	adev->bios_size = size;
+
+	if (!check_atom_bios(adev, size)) {
+		amdgpu_bios_release(adev);
+		return false;
+	}
+
+	return true;
+}
+
+bool amdgpu_read_bios(struct amdgpu_device *adev)
+{
+	uint8_t __iomem *bios;
+	size_t size;
+
+	adev->bios = NULL;
+	/* XXX: some cards may return 0 for rom size? ddx has a workaround */
+	bios = pci_map_rom(adev->pdev, &size);
+	if (!bios)
+		return false;
+
+	adev->bios = kzalloc(size, GFP_KERNEL);
+	if (adev->bios == NULL) {
+		pci_unmap_rom(adev->pdev, bios);
+		return false;
+	}
+	adev->bios_size = size;
+	memcpy_fromio(adev->bios, bios, size);
+	pci_unmap_rom(adev->pdev, bios);
+
+	if (!check_atom_bios(adev, size)) {
+		amdgpu_bios_release(adev);
+		return false;
+	}
+
+	return true;
+}
+
+static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
+{
+	u8 header[AMD_VBIOS_SIGNATURE_END+1] = {0};
+	int len;
+
+	if (!adev->asic_funcs || !adev->asic_funcs->read_bios_from_rom)
+		return false;
+
+	/* validate VBIOS signature */
+	if (amdgpu_asic_read_bios_from_rom(adev, &header[0], sizeof(header)) == false)
+		return false;
+	header[AMD_VBIOS_SIGNATURE_END] = 0;
+
+	if ((!AMD_IS_VALID_VBIOS(header)) ||
+		memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET],
+		       AMD_VBIOS_SIGNATURE,
+		       strlen(AMD_VBIOS_SIGNATURE)) != 0)
+		return false;
+
+	/* valid vbios, go on */
+	len = AMD_VBIOS_LENGTH(header);
+	len = ALIGN(len, 4);
+	adev->bios = kmalloc(len, GFP_KERNEL);
+	if (!adev->bios) {
+		DRM_ERROR("no memory to allocate for BIOS\n");
+		return false;
+	}
+	adev->bios_size = len;
+
+	/* read complete BIOS */
+	amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
+
+	if (!check_atom_bios(adev, len)) {
+		amdgpu_bios_release(adev);
+		return false;
+	}
+
+	return true;
+}
+
+static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
+{
+	phys_addr_t rom = adev->pdev->rom;
+	size_t romlen = adev->pdev->romlen;
+	void __iomem *bios;
+
+	adev->bios = NULL;
+
+	if (!rom || romlen == 0)
+		return false;
+
+	adev->bios = kzalloc(romlen, GFP_KERNEL);
+	if (!adev->bios)
+		return false;
+
+	bios = ioremap(rom, romlen);
+	if (!bios)
+		goto free_bios;
+
+	memcpy_fromio(adev->bios, bios, romlen);
+	iounmap(bios);
+
+	if (!check_atom_bios(adev, romlen))
+		goto free_bios;
+
+	adev->bios_size = romlen;
+
+	return true;
+free_bios:
+	amdgpu_bios_release(adev);
+
+	return false;
+}
+
+#ifdef CONFIG_ACPI
+/* ATRM is used to get the BIOS on the discrete cards in
+ * dual-gpu systems.
+ */
+/* retrieve the ROM in 4k blocks */
+#define ATRM_BIOS_PAGE 4096
+/**
+ * amdgpu_atrm_call - fetch a chunk of the vbios
+ *
+ * @atrm_handle: acpi ATRM handle
+ * @bios: vbios image pointer
+ * @offset: offset of vbios image data to fetch
+ * @len: length of vbios image data to fetch
+ *
+ * Executes ATRM to fetch a chunk of the discrete
+ * vbios image on PX systems (all asics).
+ * Returns the length of the buffer fetched.
+ */
+static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+			    int offset, int len)
+{
+	acpi_status status;
+	union acpi_object atrm_arg_elements[2], *obj;
+	struct acpi_object_list atrm_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+	atrm_arg.count = 2;
+	atrm_arg.pointer = &atrm_arg_elements[0];
+
+	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[0].integer.value = offset;
+
+	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[1].integer.value = len;
+
+	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)buffer.pointer;
+	memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length);
+	len = obj->buffer.length;
+	kfree(buffer.pointer);
+	return len;
+}
+
+static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
+{
+	int ret;
+	int size = 256 * 1024;
+	int i;
+	struct pci_dev *pdev = NULL;
+	acpi_handle dhandle, atrm_handle;
+	acpi_status status;
+	bool found = false;
+
+	/* ATRM is for on-platform devices only */
+	if (dev_is_removable(&adev->pdev->dev))
+		return false;
+
+	while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) {
+		if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) &&
+		    (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8))
+			continue;
+
+		dhandle = ACPI_HANDLE(&pdev->dev);
+		if (!dhandle)
+			continue;
+
+		status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+		if (ACPI_SUCCESS(status)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return false;
+	pci_dev_put(pdev);
+
+	adev->bios = kmalloc(size, GFP_KERNEL);
+	if (!adev->bios) {
+		dev_err(adev->dev, "Unable to allocate bios\n");
+		return false;
+	}
+
+	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+		ret = amdgpu_atrm_call(atrm_handle,
+				       adev->bios,
+				       (i * ATRM_BIOS_PAGE),
+				       ATRM_BIOS_PAGE);
+		if (ret < ATRM_BIOS_PAGE)
+			break;
+	}
+
+	if (!check_atom_bios(adev, size)) {
+		amdgpu_bios_release(adev);
+		return false;
+	}
+	adev->bios_size = size;
+	return true;
+}
+#else
+static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
+{
+	return false;
+}
+#endif
+
+static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
+{
+	return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
+		false : amdgpu_asic_read_disabled_bios(adev);
+}
+
+#ifdef CONFIG_ACPI
+static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
+{
+	struct acpi_table_header *hdr;
+	acpi_size tbl_size;
+	UEFI_ACPI_VFCT *vfct;
+	unsigned int offset;
+
+	if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
+		return false;
+	tbl_size = hdr->length;
+	if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
+		dev_info(adev->dev, "ACPI VFCT table present but broken (too short #1),skipping\n");
+		return false;
+	}
+
+	vfct = (UEFI_ACPI_VFCT *)hdr;
+	offset = vfct->VBIOSImageOffset;
+
+	while (offset < tbl_size) {
+		GOP_VBIOS_CONTENT *vbios = (GOP_VBIOS_CONTENT *)((char *)hdr + offset);
+		VFCT_IMAGE_HEADER *vhdr = &vbios->VbiosHeader;
+
+		offset += sizeof(VFCT_IMAGE_HEADER);
+		if (offset > tbl_size) {
+			dev_info(adev->dev, "ACPI VFCT image header truncated,skipping\n");
+			return false;
+		}
+
+		offset += vhdr->ImageLength;
+		if (offset > tbl_size) {
+			dev_info(adev->dev, "ACPI VFCT image truncated,skipping\n");
+			return false;
+		}
+
+		if (vhdr->ImageLength &&
+		    vhdr->PCIBus == adev->pdev->bus->number &&
+		    vhdr->PCIDevice == PCI_SLOT(adev->pdev->devfn) &&
+		    vhdr->PCIFunction == PCI_FUNC(adev->pdev->devfn) &&
+		    vhdr->VendorID == adev->pdev->vendor &&
+		    vhdr->DeviceID == adev->pdev->device) {
+			adev->bios = kmemdup(&vbios->VbiosContent,
+					     vhdr->ImageLength,
+					     GFP_KERNEL);
+
+			if (!check_atom_bios(adev, vhdr->ImageLength)) {
+				amdgpu_bios_release(adev);
+				return false;
+			}
+			adev->bios_size = vhdr->ImageLength;
+			return true;
+		}
+	}
+
+	dev_info(adev->dev, "ACPI VFCT table present but broken (too short #2),skipping\n");
+	return false;
+}
+#else
+static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
+{
+	return false;
+}
+#endif
+
+static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
+{
+	if (amdgpu_acpi_vfct_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+		goto success;
+	}
+
+	if (amdgpu_read_bios_from_vram(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+		goto success;
+	}
+
+	if (amdgpu_read_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+		goto success;
+	}
+
+	if (amdgpu_read_platform_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from platform\n");
+		goto success;
+	}
+
+	dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+	return false;
+
+success:
+	return true;
+}
+
+static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev)
+{
+	struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE];
+
+	return (res->flags & IORESOURCE_ROM_SHADOW);
+}
+
+static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
+{
+	if (amdgpu_atrm_get_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
+		goto success;
+	}
+
+	if (amdgpu_acpi_vfct_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
+		goto success;
+	}
+
+	/* this is required for SR-IOV */
+	if (amdgpu_read_bios_from_vram(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
+		goto success;
+	}
+
+	if (amdgpu_prefer_rom_resource(adev)) {
+		if (amdgpu_read_bios(adev)) {
+			dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+			goto success;
+		}
+
+		if (amdgpu_read_platform_bios(adev)) {
+			dev_info(adev->dev, "Fetched VBIOS from platform\n");
+			goto success;
+		}
+
+	} else {
+		if (amdgpu_read_platform_bios(adev)) {
+			dev_info(adev->dev, "Fetched VBIOS from platform\n");
+			goto success;
+		}
+
+		if (amdgpu_read_bios(adev)) {
+			dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+			goto success;
+		}
+	}
+
+	if (amdgpu_read_bios_from_rom(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from ROM\n");
+		goto success;
+	}
+
+	if (amdgpu_read_disabled_bios(adev)) {
+		dev_info(adev->dev, "Fetched VBIOS from disabled ROM BAR\n");
+		goto success;
+	}
+
+	dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
+	return false;
+
+success:
+	return true;
+}
+
+bool amdgpu_get_bios(struct amdgpu_device *adev)
+{
+	bool found;
+
+	if (adev->flags & AMD_IS_APU)
+		found = amdgpu_get_bios_apu(adev);
+	else
+		found = amdgpu_get_bios_dgpu(adev);
+
+	if (found)
+		adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
+
+	return found;
+}
+
+/* helper function for soc15 and onwards to read bios from rom */
+bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
+				     u8 *bios, u32 length_bytes)
+{
+	u32 *dw_ptr;
+	u32 i, length_dw;
+	u32 rom_offset;
+	u32 rom_index_offset;
+	u32 rom_data_offset;
+
+	if (bios == NULL)
+		return false;
+	if (length_bytes == 0)
+		return false;
+	/* APU vbios image is part of sbios image */
+	if (adev->flags & AMD_IS_APU)
+		return false;
+	if (!adev->smuio.funcs ||
+	    !adev->smuio.funcs->get_rom_index_offset ||
+	    !adev->smuio.funcs->get_rom_data_offset)
+		return false;
+
+	dw_ptr = (u32 *)bios;
+	length_dw = ALIGN(length_bytes, 4) / 4;
+
+	rom_index_offset =
+		adev->smuio.funcs->get_rom_index_offset(adev);
+	rom_data_offset =
+		adev->smuio.funcs->get_rom_data_offset(adev);
+
+	if (adev->nbio.funcs &&
+	    adev->nbio.funcs->get_rom_offset) {
+		rom_offset = adev->nbio.funcs->get_rom_offset(adev);
+		rom_offset = rom_offset << 17;
+	} else {
+		rom_offset = 0;
+	}
+
+	/* set rom index to rom_offset */
+	WREG32(rom_index_offset, rom_offset);
+	/* read out the rom data */
+	for (i = 0; i < length_dw; i++)
+		dw_ptr[i] = RREG32(rom_data_offset);
+
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
new file mode 100644
index 000000000000..66fb37b64388
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/sort.h>
+#include <linux/uaccess.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_BO_LIST_MAX_PRIORITY	32u
+#define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)
+
+static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
+{
+	struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
+						   rhead);
+	mutex_destroy(&list->bo_list_mutex);
+	kvfree(list);
+}
+
+static void amdgpu_bo_list_free(struct kref *ref)
+{
+	struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+						   refcount);
+	struct amdgpu_bo_list_entry *e;
+
+	amdgpu_bo_list_for_each_entry(e, list)
+		amdgpu_bo_unref(&e->bo);
+	call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
+
+static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b)
+{
+	const struct amdgpu_bo_list_entry *a = _a, *b = _b;
+
+	if (a->priority > b->priority)
+		return 1;
+	if (a->priority < b->priority)
+		return -1;
+	return 0;
+}
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
+			  struct drm_amdgpu_bo_list_entry *info,
+			  size_t num_entries, struct amdgpu_bo_list **result)
+{
+	unsigned last_entry = 0, first_userptr = num_entries;
+	struct amdgpu_bo_list_entry *array;
+	struct amdgpu_bo_list *list;
+	uint64_t total_size = 0;
+	unsigned i;
+	int r;
+
+	list = kvzalloc(struct_size(list, entries, num_entries), GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+
+	kref_init(&list->refcount);
+
+	list->num_entries = num_entries;
+	array = list->entries;
+
+	for (i = 0; i < num_entries; ++i) {
+		struct amdgpu_bo_list_entry *entry;
+		struct drm_gem_object *gobj;
+		struct amdgpu_bo *bo;
+		struct mm_struct *usermm;
+
+		gobj = drm_gem_object_lookup(filp, info[i].bo_handle);
+		if (!gobj) {
+			r = -ENOENT;
+			goto error_free;
+		}
+
+		bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+		drm_gem_object_put(gobj);
+
+		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+		if (usermm) {
+			if (usermm != current->mm) {
+				amdgpu_bo_unref(&bo);
+				r = -EPERM;
+				goto error_free;
+			}
+			entry = &array[--first_userptr];
+		} else {
+			entry = &array[last_entry++];
+		}
+
+		entry->priority = min(info[i].bo_priority,
+				      AMDGPU_BO_LIST_MAX_PRIORITY);
+		entry->bo = bo;
+
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
+			list->gds_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
+			list->gws_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
+			list->oa_obj = bo;
+
+		total_size += amdgpu_bo_size(bo);
+		trace_amdgpu_bo_list_set(list, bo);
+	}
+
+	list->first_userptr = first_userptr;
+	sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry),
+	     amdgpu_bo_list_entry_cmp, NULL);
+
+	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+
+	mutex_init(&list->bo_list_mutex);
+	*result = list;
+	return 0;
+
+error_free:
+	for (i = 0; i < last_entry; ++i)
+		amdgpu_bo_unref(&array[i].bo);
+	for (i = first_userptr; i < num_entries; ++i)
+		amdgpu_bo_unref(&array[i].bo);
+	kvfree(list);
+	return r;
+
+}
+
+static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
+{
+	struct amdgpu_bo_list *list;
+
+	mutex_lock(&fpriv->bo_list_lock);
+	list = idr_remove(&fpriv->bo_list_handles, id);
+	mutex_unlock(&fpriv->bo_list_lock);
+	if (list)
+		kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+		       struct amdgpu_bo_list **result)
+{
+	rcu_read_lock();
+	*result = idr_find(&fpriv->bo_list_handles, id);
+
+	if (*result && kref_get_unless_zero(&(*result)->refcount)) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	rcu_read_unlock();
+	*result = NULL;
+	return -ENOENT;
+}
+
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
+{
+	kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param)
+{
+	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+	const uint32_t bo_info_size = in->bo_info_size;
+	const uint32_t bo_number = in->bo_number;
+	struct drm_amdgpu_bo_list_entry *info;
+
+	/* copy the handle array from userspace to a kernel buffer */
+	if (likely(info_size == bo_info_size)) {
+		info = vmemdup_array_user(uptr, bo_number, info_size);
+		if (IS_ERR(info))
+			return PTR_ERR(info);
+	} else {
+		const uint32_t bytes = min(bo_info_size, info_size);
+		unsigned i;
+
+		info = kvmalloc_array(bo_number, info_size, GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		memset(info, 0, bo_number * info_size);
+		for (i = 0; i < bo_number; ++i, uptr += bo_info_size) {
+			if (copy_from_user(&info[i], uptr, bytes)) {
+				kvfree(info);
+				return -EFAULT;
+			}
+		}
+	}
+
+	*info_param = info;
+	return 0;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_bo_list *args = data;
+	uint32_t handle = args->in.list_handle;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+	struct amdgpu_bo_list *list, *old;
+	int r;
+
+	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+	if (r)
+		return r;
+
+	switch (args->in.operation) {
+	case AMDGPU_BO_LIST_OP_CREATE:
+		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+					  &list);
+		if (r)
+			goto error_free;
+
+		mutex_lock(&fpriv->bo_list_lock);
+		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+		mutex_unlock(&fpriv->bo_list_lock);
+		if (r < 0) {
+			goto error_put_list;
+		}
+
+		handle = r;
+		break;
+
+	case AMDGPU_BO_LIST_OP_DESTROY:
+		amdgpu_bo_list_destroy(fpriv, handle);
+		handle = 0;
+		break;
+
+	case AMDGPU_BO_LIST_OP_UPDATE:
+		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+					  &list);
+		if (r)
+			goto error_free;
+
+		mutex_lock(&fpriv->bo_list_lock);
+		old = idr_replace(&fpriv->bo_list_handles, list, handle);
+		mutex_unlock(&fpriv->bo_list_lock);
+
+		if (IS_ERR(old)) {
+			r = PTR_ERR(old);
+			goto error_put_list;
+		}
+
+		amdgpu_bo_list_put(old);
+		break;
+
+	default:
+		r = -EINVAL;
+		goto error_free;
+	}
+
+	memset(args, 0, sizeof(*args));
+	args->out.list_handle = handle;
+	kvfree(info);
+
+	return 0;
+
+error_put_list:
+	amdgpu_bo_list_put(list);
+
+error_free:
+	kvfree(info);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
new file mode 100644
index 000000000000..2b5e7c46a39d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_BO_LIST_H__
+#define __AMDGPU_BO_LIST_H__
+
+#include <drm/amdgpu_drm.h>
+
+struct hmm_range;
+
+struct drm_file;
+
+struct amdgpu_device;
+struct amdgpu_bo;
+struct amdgpu_bo_va;
+struct amdgpu_fpriv;
+
+struct amdgpu_bo_list_entry {
+	struct amdgpu_bo		*bo;
+	struct amdgpu_bo_va		*bo_va;
+	uint32_t			priority;
+	struct amdgpu_hmm_range		*range;
+	bool				user_invalidated;
+};
+
+struct amdgpu_bo_list {
+	struct rcu_head rhead;
+	struct kref refcount;
+	struct amdgpu_bo *gds_obj;
+	struct amdgpu_bo *gws_obj;
+	struct amdgpu_bo *oa_obj;
+	unsigned first_userptr;
+	unsigned num_entries;
+
+	/* Protect access during command submission.
+	 */
+	struct mutex bo_list_mutex;
+
+	struct amdgpu_bo_list_entry entries[] __counted_by(num_entries);
+};
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+		       struct amdgpu_bo_list **result);
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 struct drm_amdgpu_bo_list_entry *info,
+				 size_t num_entries,
+				 struct amdgpu_bo_list **list);
+
+#define amdgpu_bo_list_for_each_entry(e, list) \
+	for (e = list->entries; \
+	     e != &list->entries[list->num_entries]; \
+	     ++e)
+
+#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
+	for (e = &list->entries[list->first_userptr]; \
+	     e != &list->entries[list->num_entries]; \
+	     ++e)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
new file mode 100644
index 000000000000..004a6a9d6b9f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <linux/firmware.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_ucode.h"
+
+struct amdgpu_cgs_device {
+	struct cgs_device base;
+	struct amdgpu_device *adev;
+};
+
+#define CGS_FUNC_ADEV							\
+	struct amdgpu_device *adev =					\
+		((struct amdgpu_cgs_device *)cgs_device)->adev
+
+
+static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset)
+{
+	CGS_FUNC_ADEV;
+	return RREG32(offset);
+}
+
+static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset,
+				      uint32_t value)
+{
+	CGS_FUNC_ADEV;
+	WREG32(offset, value);
+}
+
+static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
+					     enum cgs_ind_reg space,
+					     unsigned int index)
+{
+	CGS_FUNC_ADEV;
+	switch (space) {
+	case CGS_IND_REG__PCIE:
+		return RREG32_PCIE(index);
+	case CGS_IND_REG__SMC:
+		return RREG32_SMC(index);
+	case CGS_IND_REG__UVD_CTX:
+		return RREG32_UVD_CTX(index);
+	case CGS_IND_REG__DIDT:
+		return RREG32_DIDT(index);
+	case CGS_IND_REG_GC_CAC:
+		return RREG32_GC_CAC(index);
+	case CGS_IND_REG_SE_CAC:
+		return RREG32_SE_CAC(index);
+	case CGS_IND_REG__AUDIO_ENDPT:
+		DRM_ERROR("audio endpt register access not implemented.\n");
+		return 0;
+	default:
+		BUG();
+	}
+	WARN(1, "Invalid indirect register space");
+	return 0;
+}
+
+static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
+					  enum cgs_ind_reg space,
+					  unsigned int index, uint32_t value)
+{
+	CGS_FUNC_ADEV;
+	switch (space) {
+	case CGS_IND_REG__PCIE:
+		return WREG32_PCIE(index, value);
+	case CGS_IND_REG__SMC:
+		return WREG32_SMC(index, value);
+	case CGS_IND_REG__UVD_CTX:
+		return WREG32_UVD_CTX(index, value);
+	case CGS_IND_REG__DIDT:
+		return WREG32_DIDT(index, value);
+	case CGS_IND_REG_GC_CAC:
+		return WREG32_GC_CAC(index, value);
+	case CGS_IND_REG_SE_CAC:
+		return WREG32_SE_CAC(index, value);
+	case CGS_IND_REG__AUDIO_ENDPT:
+		DRM_ERROR("audio endpt register access not implemented.\n");
+		return;
+	default:
+		BUG();
+	}
+	WARN(1, "Invalid indirect register space");
+}
+
+static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
+{
+	CGS_FUNC_ADEV;
+	enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM;
+
+	switch (fw_type) {
+	case CGS_UCODE_ID_SDMA0:
+		result = AMDGPU_UCODE_ID_SDMA0;
+		break;
+	case CGS_UCODE_ID_SDMA1:
+		result = AMDGPU_UCODE_ID_SDMA1;
+		break;
+	case CGS_UCODE_ID_CP_CE:
+		result = AMDGPU_UCODE_ID_CP_CE;
+		break;
+	case CGS_UCODE_ID_CP_PFP:
+		result = AMDGPU_UCODE_ID_CP_PFP;
+		break;
+	case CGS_UCODE_ID_CP_ME:
+		result = AMDGPU_UCODE_ID_CP_ME;
+		break;
+	case CGS_UCODE_ID_CP_MEC:
+	case CGS_UCODE_ID_CP_MEC_JT1:
+		result = AMDGPU_UCODE_ID_CP_MEC1;
+		break;
+	case CGS_UCODE_ID_CP_MEC_JT2:
+		/* for VI. JT2 should be the same as JT1, because:
+			1, MEC2 and MEC1 use exactly same FW.
+			2, JT2 is not pached but JT1 is.
+		*/
+		if (adev->asic_type >= CHIP_TOPAZ)
+			result = AMDGPU_UCODE_ID_CP_MEC1;
+		else
+			result = AMDGPU_UCODE_ID_CP_MEC2;
+		break;
+	case CGS_UCODE_ID_RLC_G:
+		result = AMDGPU_UCODE_ID_RLC_G;
+		break;
+	case CGS_UCODE_ID_STORAGE:
+		result = AMDGPU_UCODE_ID_STORAGE;
+		break;
+	default:
+		DRM_ERROR("Firmware type not supported\n");
+	}
+	return result;
+}
+
+static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
+					enum cgs_ucode_id type)
+{
+	CGS_FUNC_ADEV;
+	uint16_t fw_version = 0;
+
+	switch (type) {
+	case CGS_UCODE_ID_SDMA0:
+		fw_version = adev->sdma.instance[0].fw_version;
+		break;
+	case CGS_UCODE_ID_SDMA1:
+		fw_version = adev->sdma.instance[1].fw_version;
+		break;
+	case CGS_UCODE_ID_CP_CE:
+		fw_version = adev->gfx.ce_fw_version;
+		break;
+	case CGS_UCODE_ID_CP_PFP:
+		fw_version = adev->gfx.pfp_fw_version;
+		break;
+	case CGS_UCODE_ID_CP_ME:
+		fw_version = adev->gfx.me_fw_version;
+		break;
+	case CGS_UCODE_ID_CP_MEC:
+		fw_version = adev->gfx.mec_fw_version;
+		break;
+	case CGS_UCODE_ID_CP_MEC_JT1:
+		fw_version = adev->gfx.mec_fw_version;
+		break;
+	case CGS_UCODE_ID_CP_MEC_JT2:
+		fw_version = adev->gfx.mec_fw_version;
+		break;
+	case CGS_UCODE_ID_RLC_G:
+		fw_version = adev->gfx.rlc_fw_version;
+		break;
+	case CGS_UCODE_ID_STORAGE:
+		break;
+	default:
+		DRM_ERROR("firmware type %d do not have version\n", type);
+		break;
+	}
+	return fw_version;
+}
+
+static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
+					enum cgs_ucode_id type,
+					struct cgs_firmware_info *info)
+{
+	CGS_FUNC_ADEV;
+
+	if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) {
+		uint64_t gpu_addr;
+		uint32_t data_size;
+		const struct gfx_firmware_header_v1_0 *header;
+		enum AMDGPU_UCODE_ID id;
+		struct amdgpu_firmware_info *ucode;
+
+		id = fw_type_convert(cgs_device, type);
+		if (id >= AMDGPU_UCODE_ID_MAXIMUM)
+			return -EINVAL;
+
+		ucode = &adev->firmware.ucode[id];
+		if (ucode->fw == NULL)
+			return -EINVAL;
+
+		gpu_addr  = ucode->mc_addr;
+		header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+		data_size = le32_to_cpu(header->header.ucode_size_bytes);
+
+		if ((type == CGS_UCODE_ID_CP_MEC_JT1) ||
+		    (type == CGS_UCODE_ID_CP_MEC_JT2)) {
+			gpu_addr += ALIGN(le32_to_cpu(header->header.ucode_size_bytes), PAGE_SIZE);
+			data_size = le32_to_cpu(header->jt_size) << 2;
+		}
+
+		info->kptr = ucode->kaddr;
+		info->image_size = data_size;
+		info->mc_addr = gpu_addr;
+		info->version = (uint16_t)le32_to_cpu(header->header.ucode_version);
+
+		if (type == CGS_UCODE_ID_CP_MEC)
+			info->image_size = le32_to_cpu(header->jt_offset) << 2;
+
+		info->fw_version = amdgpu_get_firmware_version(cgs_device, type);
+		info->feature_version = (uint16_t)le32_to_cpu(header->ucode_feature_version);
+	} else {
+		char fw_name[30] = {0};
+		int err = 0;
+		uint32_t ucode_size;
+		uint32_t ucode_start_address;
+		const uint8_t *src;
+		const struct smc_firmware_header_v1_0 *hdr;
+		const struct common_firmware_header *header;
+		struct amdgpu_firmware_info *ucode = NULL;
+
+		if (!adev->pm.fw) {
+			switch (adev->asic_type) {
+			case CHIP_BONAIRE:
+				if ((adev->pdev->revision == 0x80) ||
+					(adev->pdev->revision == 0x81) ||
+					(adev->pdev->device == 0x665f)) {
+					info->is_kicker = true;
+					strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+				} else {
+					strscpy(fw_name, "amdgpu/bonaire_smc.bin");
+				}
+				break;
+			case CHIP_HAWAII:
+				if (adev->pdev->revision == 0x80) {
+					info->is_kicker = true;
+					strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+				} else {
+					strscpy(fw_name, "amdgpu/hawaii_smc.bin");
+				}
+				break;
+			case CHIP_TOPAZ:
+				if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
+				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
+				    ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) ||
+				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
+				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
+					info->is_kicker = true;
+					strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
+				} else
+					strscpy(fw_name, "amdgpu/topaz_smc.bin");
+				break;
+			case CHIP_TONGA:
+				if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
+				    ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
+					info->is_kicker = true;
+					strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
+				} else
+					strscpy(fw_name, "amdgpu/tonga_smc.bin");
+				break;
+			case CHIP_FIJI:
+				strscpy(fw_name, "amdgpu/fiji_smc.bin");
+				break;
+			case CHIP_POLARIS11:
+				if (type == CGS_UCODE_ID_SMU) {
+					if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
+						info->is_kicker = true;
+						strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+					} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
+						info->is_kicker = true;
+						strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+					} else {
+						strscpy(fw_name, "amdgpu/polaris11_smc.bin");
+					}
+				} else if (type == CGS_UCODE_ID_SMU_SK) {
+					strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+				}
+				break;
+			case CHIP_POLARIS10:
+				if (type == CGS_UCODE_ID_SMU) {
+					if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
+						info->is_kicker = true;
+						strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+					} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
+						info->is_kicker = true;
+						strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+					} else {
+						strscpy(fw_name, "amdgpu/polaris10_smc.bin");
+					}
+				} else if (type == CGS_UCODE_ID_SMU_SK) {
+					strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+				}
+				break;
+			case CHIP_POLARIS12:
+				if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
+					info->is_kicker = true;
+					strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+				} else {
+					strscpy(fw_name, "amdgpu/polaris12_smc.bin");
+				}
+				break;
+			case CHIP_VEGAM:
+				strscpy(fw_name, "amdgpu/vegam_smc.bin");
+				break;
+			case CHIP_VEGA10:
+				if ((adev->pdev->device == 0x687f) &&
+					((adev->pdev->revision == 0xc0) ||
+					(adev->pdev->revision == 0xc1) ||
+					(adev->pdev->revision == 0xc3)))
+					strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+				else
+					strscpy(fw_name, "amdgpu/vega10_smc.bin");
+				break;
+			case CHIP_VEGA12:
+				strscpy(fw_name, "amdgpu/vega12_smc.bin");
+				break;
+			case CHIP_VEGA20:
+				strscpy(fw_name, "amdgpu/vega20_smc.bin");
+				break;
+			default:
+				DRM_ERROR("SMC firmware not supported\n");
+				return -EINVAL;
+			}
+
+			err = amdgpu_ucode_request(adev, &adev->pm.fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "%s", fw_name);
+			if (err) {
+				DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
+				amdgpu_ucode_release(&adev->pm.fw);
+				return err;
+			}
+
+			if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+				ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+				ucode->ucode_id = AMDGPU_UCODE_ID_SMC;
+				ucode->fw = adev->pm.fw;
+				header = (const struct common_firmware_header *)ucode->fw->data;
+				adev->firmware.fw_size +=
+					ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+			}
+		}
+
+		hdr = (const struct smc_firmware_header_v1_0 *)	adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(&hdr->header);
+		adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
+		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
+		ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
+		src = (const uint8_t *)(adev->pm.fw->data +
+		       le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+		info->version = adev->pm.fw_version;
+		info->image_size = ucode_size;
+		info->ucode_start_address = ucode_start_address;
+		info->kptr = (void *)src;
+	}
+	return 0;
+}
+
+static const struct cgs_ops amdgpu_cgs_ops = {
+	.read_register = amdgpu_cgs_read_register,
+	.write_register = amdgpu_cgs_write_register,
+	.read_ind_register = amdgpu_cgs_read_ind_register,
+	.write_ind_register = amdgpu_cgs_write_ind_register,
+	.get_firmware_info = amdgpu_cgs_get_firmware_info,
+};
+
+struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
+{
+	struct amdgpu_cgs_device *cgs_device =
+		kmalloc(sizeof(*cgs_device), GFP_KERNEL);
+
+	if (!cgs_device) {
+		DRM_ERROR("Couldn't allocate CGS device structure\n");
+		return NULL;
+	}
+
+	cgs_device->base.ops = &amdgpu_cgs_ops;
+	cgs_device->adev = adev;
+
+	return (struct cgs_device *)cgs_device;
+}
+
+void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device)
+{
+	kfree(cgs_device);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
new file mode 100644
index 000000000000..9f96d568acf2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -0,0 +1,2053 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
+
+#include <linux/pm_runtime.h>
+
+void amdgpu_connector_hotplug(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	/* bail if the connector does not have hpd pin, e.g.,
+	 * VGA, TV, etc.
+	 */
+	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE)
+		return;
+
+	amdgpu_display_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+
+	/* if the connector is already off, don't turn it back on */
+	if (connector->dpms != DRM_MODE_DPMS_ON)
+		return;
+
+	/* just deal with DP (not eDP) here. */
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+		struct amdgpu_connector_atom_dig *dig_connector =
+			amdgpu_connector->con_priv;
+
+		/* if existing sink type was not DP no need to retrain */
+		if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT)
+			return;
+
+		/* first get sink type as it may be reset after (un)plug */
+		dig_connector->dp_sink_type = amdgpu_atombios_dp_get_sinktype(amdgpu_connector);
+		/* don't do anything if sink is not display port, i.e.,
+		 * passive dp->(dvi|hdmi) adaptor
+		 */
+		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+		    amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+		    amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+			/* Don't start link training before we have the DPCD */
+			if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+				return;
+
+			/* Turn the connector off and back on immediately, which
+			 * will trigger link training
+			 */
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+		}
+	}
+}
+
+static void amdgpu_connector_property_change_mode(struct drm_encoder *encoder)
+{
+	struct drm_crtc *crtc = encoder->crtc;
+
+	if (crtc && crtc->enabled) {
+		drm_crtc_helper_set_mode(crtc, &crtc->mode,
+					 crtc->x, crtc->y, crtc->primary->fb);
+	}
+}
+
+int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *dig_connector;
+	int bpc = 8;
+	unsigned int mode_clock, max_tmds_clock;
+
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_DVII:
+	case DRM_MODE_CONNECTOR_HDMIB:
+		if (amdgpu_connector->use_digital) {
+			if (connector->display_info.is_hdmi) {
+				if (connector->display_info.bpc)
+					bpc = connector->display_info.bpc;
+			}
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVID:
+	case DRM_MODE_CONNECTOR_HDMIA:
+		if (connector->display_info.is_hdmi) {
+			if (connector->display_info.bpc)
+				bpc = connector->display_info.bpc;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		dig_connector = amdgpu_connector->con_priv;
+		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) ||
+		    connector->display_info.is_hdmi) {
+			if (connector->display_info.bpc)
+				bpc = connector->display_info.bpc;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_eDP:
+	case DRM_MODE_CONNECTOR_LVDS:
+		if (connector->display_info.bpc)
+			bpc = connector->display_info.bpc;
+		else {
+			const struct drm_connector_helper_funcs *connector_funcs =
+				connector->helper_private;
+			struct drm_encoder *encoder = connector_funcs->best_encoder(connector);
+			struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+			struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+			if (dig->lcd_misc & ATOM_PANEL_MISC_V13_6BIT_PER_COLOR)
+				bpc = 6;
+			else if (dig->lcd_misc & ATOM_PANEL_MISC_V13_8BIT_PER_COLOR)
+				bpc = 8;
+		}
+		break;
+	}
+
+	if (connector->display_info.is_hdmi) {
+		/*
+		 * Pre DCE-8 hw can't handle > 12 bpc, and more than 12 bpc doesn't make
+		 * much sense without support for > 12 bpc framebuffers. RGB 4:4:4 at
+		 * 12 bpc is always supported on hdmi deep color sinks, as this is
+		 * required by the HDMI-1.3 spec. Clamp to a safe 12 bpc maximum.
+		 */
+		if (bpc > 12) {
+			DRM_DEBUG("%s: HDMI deep color %d bpc unsupported. Using 12 bpc.\n",
+				  connector->name, bpc);
+			bpc = 12;
+		}
+
+		/* Any defined maximum tmds clock limit we must not exceed? */
+		if (connector->display_info.max_tmds_clock > 0) {
+			/* mode_clock is clock in kHz for mode to be modeset on this connector */
+			mode_clock = amdgpu_connector->pixelclock_for_modeset;
+
+			/* Maximum allowable input clock in kHz */
+			max_tmds_clock = connector->display_info.max_tmds_clock;
+
+			DRM_DEBUG("%s: hdmi mode dotclock %d kHz, max tmds input clock %d kHz.\n",
+				  connector->name, mode_clock, max_tmds_clock);
+
+			/* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+			if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+				if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) &&
+				    (mode_clock * 5/4 <= max_tmds_clock))
+					bpc = 10;
+				else
+					bpc = 8;
+
+				DRM_DEBUG("%s: HDMI deep color 12 bpc exceeds max tmds clock. Using %d bpc.\n",
+					  connector->name, bpc);
+			}
+
+			if ((bpc == 10) && (mode_clock * 5/4 > max_tmds_clock)) {
+				bpc = 8;
+				DRM_DEBUG("%s: HDMI deep color 10 bpc exceeds max tmds clock. Using %d bpc.\n",
+					  connector->name, bpc);
+			}
+		} else if (bpc > 8) {
+			/* max_tmds_clock missing, but hdmi spec mandates it for deep color. */
+			DRM_DEBUG("%s: Required max tmds clock for HDMI deep color missing. Using 8 bpc.\n",
+				  connector->name);
+			bpc = 8;
+		}
+	}
+
+	if ((amdgpu_deep_color == 0) && (bpc > 8)) {
+		DRM_DEBUG("%s: Deep color disabled. Set amdgpu module param deep_color=1 to enable.\n",
+			  connector->name);
+		bpc = 8;
+	}
+
+	DRM_DEBUG("%s: Display bpc=%d, returned bpc=%d\n",
+		  connector->name, connector->display_info.bpc, bpc);
+
+	return bpc;
+}
+
+static void
+amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
+				      enum drm_connector_status status)
+{
+	struct drm_encoder *best_encoder;
+	struct drm_encoder *encoder;
+	const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+	bool connected;
+
+	best_encoder = connector_funcs->best_encoder(connector);
+
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		if ((encoder == best_encoder) && (status == connector_status_connected))
+			connected = true;
+		else
+			connected = false;
+
+		amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected);
+	}
+}
+
+static struct drm_encoder *
+amdgpu_connector_find_encoder(struct drm_connector *connector,
+			       int encoder_type)
+{
+	struct drm_encoder *encoder;
+
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		if (encoder->encoder_type == encoder_type)
+			return encoder;
+	}
+
+	return NULL;
+}
+
+static struct edid *
+amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev)
+{
+	return drm_edid_duplicate(drm_edid_raw(adev->mode_info.bios_hardcoded_edid));
+}
+
+static void amdgpu_connector_get_edid(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	if (amdgpu_connector->edid)
+		return;
+
+	/* on hw with routers, select right port */
+	if (amdgpu_connector->router.ddc_valid)
+		amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
+
+	if ((amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+	     ENCODER_OBJECT_ID_NONE) &&
+	    amdgpu_connector->ddc_bus->has_aux) {
+		amdgpu_connector->edid = drm_get_edid(connector,
+						      &amdgpu_connector->ddc_bus->aux.ddc);
+	} else if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+		   (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
+		struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
+
+		if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
+		     dig->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) &&
+		    amdgpu_connector->ddc_bus->has_aux)
+			amdgpu_connector->edid = drm_get_edid(connector,
+							      &amdgpu_connector->ddc_bus->aux.ddc);
+		else if (amdgpu_connector->ddc_bus)
+			amdgpu_connector->edid = drm_get_edid(connector,
+							      &amdgpu_connector->ddc_bus->adapter);
+	} else if (amdgpu_connector->ddc_bus) {
+		amdgpu_connector->edid = drm_get_edid(connector,
+						      &amdgpu_connector->ddc_bus->adapter);
+	}
+
+	if (!amdgpu_connector->edid) {
+		/* some laptops provide a hardcoded edid in rom for LCDs */
+		if (((connector->connector_type == DRM_MODE_CONNECTOR_LVDS) ||
+		     (connector->connector_type == DRM_MODE_CONNECTOR_eDP))) {
+			amdgpu_connector->edid = amdgpu_connector_get_hardcoded_edid(adev);
+			drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+		}
+	}
+}
+
+static void amdgpu_connector_free_edid(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	kfree(amdgpu_connector->edid);
+	amdgpu_connector->edid = NULL;
+}
+
+static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	int ret;
+
+	if (amdgpu_connector->edid) {
+		drm_connector_update_edid_property(connector, amdgpu_connector->edid);
+		ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
+		return ret;
+	}
+	drm_connector_update_edid_property(connector, NULL);
+	return 0;
+}
+
+static struct drm_encoder *
+amdgpu_connector_best_single_encoder(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder;
+
+	/* pick the first one */
+	drm_connector_for_each_possible_encoder(connector, encoder)
+		return encoder;
+
+	return NULL;
+}
+
+static void amdgpu_get_native_mode(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	if (encoder == NULL)
+		return;
+
+	amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+	if (!list_empty(&connector->probed_modes)) {
+		struct drm_display_mode *preferred_mode =
+			list_first_entry(&connector->probed_modes,
+					 struct drm_display_mode, head);
+
+		amdgpu_encoder->native_mode = *preferred_mode;
+	} else {
+		amdgpu_encoder->native_mode.clock = 0;
+	}
+}
+
+static struct drm_display_mode *
+amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_display_mode *mode = NULL;
+	struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+	if (native_mode->hdisplay != 0 &&
+	    native_mode->vdisplay != 0 &&
+	    native_mode->clock != 0) {
+		mode = drm_mode_duplicate(dev, native_mode);
+		if (!mode)
+			return NULL;
+
+		mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+		drm_mode_set_name(mode);
+
+		DRM_DEBUG_KMS("Adding native panel mode %s\n", mode->name);
+	} else if (native_mode->hdisplay != 0 &&
+		   native_mode->vdisplay != 0) {
+		/* mac laptops without an edid */
+		/* Note that this is not necessarily the exact panel mode,
+		 * but an approximation based on the cvt formula.  For these
+		 * systems we should ideally read the mode info out of the
+		 * registers or add a mode table, but this works and is much
+		 * simpler.
+		 */
+		mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false);
+		if (!mode)
+			return NULL;
+
+		mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+		DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name);
+	}
+	return mode;
+}
+
+static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder,
+					       struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_display_mode *mode = NULL;
+	struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+	int i;
+	int n;
+	struct mode_size {
+		char name[DRM_DISPLAY_MODE_LEN];
+		int w;
+		int h;
+	} common_modes[] = {
+		{  "640x480",  640,  480},
+		{  "800x600",  800,  600},
+		{ "1024x768", 1024,  768},
+		{ "1280x720", 1280,  720},
+		{ "1280x800", 1280,  800},
+		{"1280x1024", 1280, 1024},
+		{ "1440x900", 1440,  900},
+		{"1680x1050", 1680, 1050},
+		{"1600x1200", 1600, 1200},
+		{"1920x1080", 1920, 1080},
+		{"1920x1200", 1920, 1200}
+	};
+
+	n = ARRAY_SIZE(common_modes);
+
+	for (i = 0; i < n; i++) {
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT)) {
+			if (common_modes[i].w > 1024 ||
+			    common_modes[i].h > 768)
+				continue;
+		}
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+			if (common_modes[i].w > native_mode->hdisplay ||
+			    common_modes[i].h > native_mode->vdisplay ||
+			    (common_modes[i].w == native_mode->hdisplay &&
+			     common_modes[i].h == native_mode->vdisplay))
+				continue;
+		}
+
+		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+		if (!mode)
+			return;
+		strscpy(mode->name, common_modes[i].name, DRM_DISPLAY_MODE_LEN);
+
+		drm_mode_probed_add(connector, mode);
+	}
+}
+
+static int amdgpu_connector_set_property(struct drm_connector *connector,
+					  struct drm_property *property,
+					  uint64_t val)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	if (property == adev->mode_info.coherent_mode_property) {
+		struct amdgpu_encoder_atom_dig *dig;
+		bool new_coherent_mode;
+
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (!amdgpu_encoder->enc_priv)
+			return 0;
+
+		dig = amdgpu_encoder->enc_priv;
+		new_coherent_mode = val ? true : false;
+		if (dig->coherent_mode != new_coherent_mode) {
+			dig->coherent_mode = new_coherent_mode;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.audio_property) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (amdgpu_connector->audio != val) {
+			amdgpu_connector->audio = val;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.dither_property) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (amdgpu_connector->dither != val) {
+			amdgpu_connector->dither = val;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.underscan_property) {
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (amdgpu_encoder->underscan_type != val) {
+			amdgpu_encoder->underscan_type = val;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.underscan_hborder_property) {
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (amdgpu_encoder->underscan_hborder != val) {
+			amdgpu_encoder->underscan_hborder = val;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.underscan_vborder_property) {
+		/* need to find digital encoder on connector */
+		encoder = amdgpu_connector_find_encoder(connector, DRM_MODE_ENCODER_TMDS);
+		if (!encoder)
+			return 0;
+
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		if (amdgpu_encoder->underscan_vborder != val) {
+			amdgpu_encoder->underscan_vborder = val;
+			amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+		}
+	}
+
+	if (property == adev->mode_info.load_detect_property) {
+		struct amdgpu_connector *amdgpu_connector =
+			to_amdgpu_connector(connector);
+
+		if (val == 0)
+			amdgpu_connector->dac_load_detect = false;
+		else
+			amdgpu_connector->dac_load_detect = true;
+	}
+
+	if (property == dev->mode_config.scaling_mode_property) {
+		enum amdgpu_rmx_type rmx_type;
+
+		if (connector->encoder) {
+			amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
+		} else {
+			const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
+			amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
+		}
+
+		switch (val) {
+		default:
+		case DRM_MODE_SCALE_NONE:
+			rmx_type = RMX_OFF;
+			break;
+		case DRM_MODE_SCALE_CENTER:
+			rmx_type = RMX_CENTER;
+			break;
+		case DRM_MODE_SCALE_ASPECT:
+			rmx_type = RMX_ASPECT;
+			break;
+		case DRM_MODE_SCALE_FULLSCREEN:
+			rmx_type = RMX_FULL;
+			break;
+		}
+
+		if (amdgpu_encoder->rmx_type == rmx_type)
+			return 0;
+
+		if ((rmx_type != DRM_MODE_SCALE_NONE) &&
+		    (amdgpu_encoder->native_mode.clock == 0))
+			return 0;
+
+		amdgpu_encoder->rmx_type = rmx_type;
+
+		amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+	}
+
+	return 0;
+}
+
+static void
+amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder,
+					struct drm_connector *connector)
+{
+	struct amdgpu_encoder *amdgpu_encoder =	to_amdgpu_encoder(encoder);
+	struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+	struct drm_display_mode *t, *mode;
+
+	/* If the EDID preferred mode doesn't match the native mode, use it */
+	list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+		if (mode->type & DRM_MODE_TYPE_PREFERRED) {
+			if (mode->hdisplay != native_mode->hdisplay ||
+			    mode->vdisplay != native_mode->vdisplay)
+				drm_mode_copy(native_mode, mode);
+		}
+	}
+
+	/* Try to get native mode details from EDID if necessary */
+	if (!native_mode->clock) {
+		list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+			if (mode->hdisplay == native_mode->hdisplay &&
+			    mode->vdisplay == native_mode->vdisplay) {
+				drm_mode_copy(native_mode, mode);
+				drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V);
+				DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n");
+				break;
+			}
+		}
+	}
+
+	if (!native_mode->clock) {
+		DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n");
+		amdgpu_encoder->rmx_type = RMX_OFF;
+	}
+}
+
+static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder;
+	int ret = 0;
+	struct drm_display_mode *mode;
+
+	amdgpu_connector_get_edid(connector);
+	ret = amdgpu_connector_ddc_get_modes(connector);
+	if (ret > 0) {
+		encoder = amdgpu_connector_best_single_encoder(connector);
+		if (encoder) {
+			amdgpu_connector_fixup_lcd_native_mode(encoder, connector);
+			/* add scaled modes */
+			amdgpu_connector_add_common_modes(encoder, connector);
+		}
+		return ret;
+	}
+
+	encoder = amdgpu_connector_best_single_encoder(connector);
+	if (!encoder)
+		return 0;
+
+	/* we have no EDID modes */
+	mode = amdgpu_connector_lcd_native_mode(encoder);
+	if (mode) {
+		ret = 1;
+		drm_mode_probed_add(connector, mode);
+		/* add the width/height from vbios tables if available */
+		connector->display_info.width_mm = mode->width_mm;
+		connector->display_info.height_mm = mode->height_mm;
+		/* add scaled modes */
+		amdgpu_connector_add_common_modes(encoder, connector);
+	}
+
+	return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+					     const struct drm_display_mode *mode)
+{
+	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+
+	if ((mode->hdisplay < 320) || (mode->vdisplay < 240))
+		return MODE_PANEL;
+
+	if (encoder) {
+		struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+		struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+		/* AVIVO hardware supports downscaling modes larger than the panel
+		 * to the panel size, but I'm not sure this is desirable.
+		 */
+		if ((mode->hdisplay > native_mode->hdisplay) ||
+		    (mode->vdisplay > native_mode->vdisplay))
+			return MODE_PANEL;
+
+		/* if scaling is disabled, block non-native modes */
+		if (amdgpu_encoder->rmx_type == RMX_OFF) {
+			if ((mode->hdisplay != native_mode->hdisplay) ||
+			    (mode->vdisplay != native_mode->vdisplay))
+				return MODE_PANEL;
+		}
+	}
+
+	return MODE_OK;
+}
+
+static enum drm_connector_status
+amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+	enum drm_connector_status ret = connector_status_disconnected;
+	int r;
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0) {
+			pm_runtime_put_autosuspend(connector->dev->dev);
+			return connector_status_disconnected;
+		}
+	}
+
+	if (encoder) {
+		struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+		struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+		/* check if panel is valid */
+		if (native_mode->hdisplay >= 320 && native_mode->vdisplay >= 240)
+			ret = connector_status_connected;
+
+	}
+
+	/* check for edid as well */
+	amdgpu_connector_get_edid(connector);
+	if (amdgpu_connector->edid)
+		ret = connector_status_connected;
+	/* check acpi lid status ??? */
+
+	amdgpu_connector_update_scratch_regs(connector, ret);
+
+	if (!drm_kms_helper_is_poll_worker())
+		pm_runtime_put_autosuspend(connector->dev->dev);
+
+	return ret;
+}
+
+static void amdgpu_connector_unregister(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	if (amdgpu_connector->ddc_bus && amdgpu_connector->ddc_bus->has_aux) {
+		drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux);
+		amdgpu_connector->ddc_bus->has_aux = false;
+	}
+}
+
+static void amdgpu_connector_destroy(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	amdgpu_connector_free_edid(connector);
+	kfree(amdgpu_connector->con_priv);
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
+
+static int amdgpu_connector_set_lcd_property(struct drm_connector *connector,
+					      struct drm_property *property,
+					      uint64_t value)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_encoder *amdgpu_encoder;
+	enum amdgpu_rmx_type rmx_type;
+
+	DRM_DEBUG_KMS("\n");
+	if (property != dev->mode_config.scaling_mode_property)
+		return 0;
+
+	if (connector->encoder)
+		amdgpu_encoder = to_amdgpu_encoder(connector->encoder);
+	else {
+		const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
+
+		amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector));
+	}
+
+	switch (value) {
+	case DRM_MODE_SCALE_NONE:
+		rmx_type = RMX_OFF;
+		break;
+	case DRM_MODE_SCALE_CENTER:
+		rmx_type = RMX_CENTER;
+		break;
+	case DRM_MODE_SCALE_ASPECT:
+		rmx_type = RMX_ASPECT;
+		break;
+	default:
+	case DRM_MODE_SCALE_FULLSCREEN:
+		rmx_type = RMX_FULL;
+		break;
+	}
+
+	if (amdgpu_encoder->rmx_type == rmx_type)
+		return 0;
+
+	amdgpu_encoder->rmx_type = rmx_type;
+
+	amdgpu_connector_property_change_mode(&amdgpu_encoder->base);
+	return 0;
+}
+
+
+static const struct drm_connector_helper_funcs amdgpu_connector_lvds_helper_funcs = {
+	.get_modes = amdgpu_connector_lvds_get_modes,
+	.mode_valid = amdgpu_connector_lvds_mode_valid,
+	.best_encoder = amdgpu_connector_best_single_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_lvds_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = amdgpu_connector_lvds_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.early_unregister = amdgpu_connector_unregister,
+	.destroy = amdgpu_connector_destroy,
+	.set_property = amdgpu_connector_set_lcd_property,
+};
+
+static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
+{
+	int ret;
+
+	amdgpu_connector_get_edid(connector);
+	ret = amdgpu_connector_ddc_get_modes(connector);
+	amdgpu_get_native_mode(connector);
+
+	return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+					    const struct drm_display_mode *mode)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	/* XXX check mode bandwidth */
+
+	if ((mode->clock / 10) > adev->clock.max_pixel_clock)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static enum drm_connector_status
+amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct drm_encoder *encoder;
+	const struct drm_encoder_helper_funcs *encoder_funcs;
+	bool dret = false;
+	enum drm_connector_status ret = connector_status_disconnected;
+	int r;
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0) {
+			pm_runtime_put_autosuspend(connector->dev->dev);
+			return connector_status_disconnected;
+		}
+	}
+
+	encoder = amdgpu_connector_best_single_encoder(connector);
+	if (!encoder)
+		ret = connector_status_disconnected;
+
+	if (amdgpu_connector->ddc_bus)
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+	if (dret) {
+		amdgpu_connector->detected_by_load = false;
+		amdgpu_connector_free_edid(connector);
+		amdgpu_connector_get_edid(connector);
+
+		if (!amdgpu_connector->edid) {
+			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
+					connector->name);
+			ret = connector_status_connected;
+		} else {
+			amdgpu_connector->use_digital =
+				!!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+
+			/* some oems have boards with separate digital and analog connectors
+			 * with a shared ddc line (often vga + hdmi)
+			 */
+			if (amdgpu_connector->use_digital && amdgpu_connector->shared_ddc) {
+				amdgpu_connector_free_edid(connector);
+				ret = connector_status_disconnected;
+			} else {
+				ret = connector_status_connected;
+			}
+		}
+	} else {
+
+		/* if we aren't forcing don't do destructive polling */
+		if (!force) {
+			/* only return the previous status if we last
+			 * detected a monitor via load.
+			 */
+			if (amdgpu_connector->detected_by_load)
+				ret = connector->status;
+			goto out;
+		}
+
+		if (amdgpu_connector->dac_load_detect && encoder) {
+			encoder_funcs = encoder->helper_private;
+			ret = encoder_funcs->detect(encoder, connector);
+			if (ret != connector_status_disconnected)
+				amdgpu_connector->detected_by_load = true;
+		}
+	}
+
+	amdgpu_connector_update_scratch_regs(connector, ret);
+
+out:
+	if (!drm_kms_helper_is_poll_worker())
+		pm_runtime_put_autosuspend(connector->dev->dev);
+
+	return ret;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_vga_helper_funcs = {
+	.get_modes = amdgpu_connector_vga_get_modes,
+	.mode_valid = amdgpu_connector_vga_mode_valid,
+	.best_encoder = amdgpu_connector_best_single_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_vga_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = amdgpu_connector_vga_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.early_unregister = amdgpu_connector_unregister,
+	.destroy = amdgpu_connector_destroy,
+	.set_property = amdgpu_connector_set_property,
+};
+
+static bool
+amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	enum drm_connector_status status;
+
+	if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE) {
+		if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd))
+			status = connector_status_connected;
+		else
+			status = connector_status_disconnected;
+		if (connector->status == status)
+			return true;
+	}
+
+	return false;
+}
+
+static void amdgpu_connector_shared_ddc(enum drm_connector_status *status,
+					struct drm_connector *connector,
+					struct amdgpu_connector *amdgpu_connector)
+{
+	struct drm_connector *list_connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *list_amdgpu_connector;
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (amdgpu_connector->shared_ddc && *status == connector_status_connected) {
+		drm_connector_list_iter_begin(dev, &iter);
+		drm_for_each_connector_iter(list_connector,
+					    &iter) {
+			if (connector == list_connector)
+				continue;
+			list_amdgpu_connector = to_amdgpu_connector(list_connector);
+			if (list_amdgpu_connector->shared_ddc &&
+			    list_amdgpu_connector->ddc_bus->rec.i2c_id ==
+			     amdgpu_connector->ddc_bus->rec.i2c_id) {
+				/* cases where both connectors are digital */
+				if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+					/* hpd is our only option in this case */
+					if (!amdgpu_display_hpd_sense(adev,
+								      amdgpu_connector->hpd.hpd)) {
+						amdgpu_connector_free_edid(connector);
+						*status = connector_status_disconnected;
+					}
+				}
+			}
+		}
+		drm_connector_list_iter_end(&iter);
+	}
+}
+
+/*
+ * DVI is complicated
+ * Do a DDC probe, if DDC probe passes, get the full EDID so
+ * we can do analog/digital monitor detection at this point.
+ * If the monitor is an analog monitor or we got no DDC,
+ * we need to find the DAC encoder object for this connector.
+ * If we got no DDC, we do load detection on the DAC encoder object.
+ * If we got analog DDC or load detection passes on the DAC encoder
+ * we have to check if this analog encoder is shared with anyone else (TV)
+ * if its shared we have to set the other connector to disconnected.
+ */
+static enum drm_connector_status
+amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	const struct drm_encoder_helper_funcs *encoder_funcs;
+	int r;
+	enum drm_connector_status ret = connector_status_disconnected;
+	bool dret = false, broken_edid = false;
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0) {
+			pm_runtime_put_autosuspend(connector->dev->dev);
+			return connector_status_disconnected;
+		}
+	}
+
+	if (amdgpu_connector->detected_hpd_without_ddc) {
+		force = true;
+		amdgpu_connector->detected_hpd_without_ddc = false;
+	}
+
+	if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
+		ret = connector->status;
+		goto exit;
+	}
+
+	if (amdgpu_connector->ddc_bus) {
+		dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
+
+		/* Sometimes the pins required for the DDC probe on DVI
+		 * connectors don't make contact at the same time that the ones
+		 * for HPD do. If the DDC probe fails even though we had an HPD
+		 * signal, try again later
+		 */
+		if (!dret && !force &&
+		    amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+			DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+			amdgpu_connector->detected_hpd_without_ddc = true;
+			schedule_delayed_work(&adev->hotplug_work,
+					      msecs_to_jiffies(1000));
+			goto exit;
+		}
+	}
+	if (dret) {
+		amdgpu_connector->detected_by_load = false;
+		amdgpu_connector_free_edid(connector);
+		amdgpu_connector_get_edid(connector);
+
+		if (!amdgpu_connector->edid) {
+			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
+					connector->name);
+			ret = connector_status_connected;
+			broken_edid = true; /* defer use_digital to later */
+		} else {
+			amdgpu_connector->use_digital =
+				!!(amdgpu_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
+
+			/* some oems have boards with separate digital and analog connectors
+			 * with a shared ddc line (often vga + hdmi)
+			 */
+			if ((!amdgpu_connector->use_digital) && amdgpu_connector->shared_ddc) {
+				amdgpu_connector_free_edid(connector);
+				ret = connector_status_disconnected;
+			} else {
+				ret = connector_status_connected;
+			}
+
+			/* This gets complicated.  We have boards with VGA + HDMI with a
+			 * shared DDC line and we have boards with DVI-D + HDMI with a shared
+			 * DDC line.  The latter is more complex because with DVI<->HDMI adapters
+			 * you don't really know what's connected to which port as both are digital.
+			 */
+			amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
+		}
+	}
+
+	if ((ret == connector_status_connected) && (amdgpu_connector->use_digital == true))
+		goto out;
+
+	/* DVI-D and HDMI-A are digital only */
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+		goto out;
+
+	/* if we aren't forcing don't do destructive polling */
+	if (!force) {
+		/* only return the previous status if we last
+		 * detected a monitor via load.
+		 */
+		if (amdgpu_connector->detected_by_load)
+			ret = connector->status;
+		goto out;
+	}
+
+	/* find analog encoder */
+	if (amdgpu_connector->dac_load_detect) {
+		struct drm_encoder *encoder;
+
+		drm_connector_for_each_possible_encoder(connector, encoder) {
+			if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
+			    encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
+				continue;
+
+			encoder_funcs = encoder->helper_private;
+			if (encoder_funcs->detect) {
+				if (!broken_edid) {
+					if (ret != connector_status_connected) {
+						/* deal with analog monitors without DDC */
+						ret = encoder_funcs->detect(encoder, connector);
+						if (ret == connector_status_connected) {
+							amdgpu_connector->use_digital = false;
+						}
+						if (ret != connector_status_disconnected)
+							amdgpu_connector->detected_by_load = true;
+					}
+				} else {
+					enum drm_connector_status lret;
+					/* assume digital unless load detected otherwise */
+					amdgpu_connector->use_digital = true;
+					lret = encoder_funcs->detect(encoder, connector);
+					DRM_DEBUG_KMS("load_detect %x returned: %x\n",
+						      encoder->encoder_type, lret);
+					if (lret == connector_status_connected)
+						amdgpu_connector->use_digital = false;
+				}
+				break;
+			}
+		}
+	}
+
+out:
+	/* updated in get modes as well since we need to know if it's analog or digital */
+	amdgpu_connector_update_scratch_regs(connector, ret);
+
+exit:
+	if (!drm_kms_helper_is_poll_worker())
+		pm_runtime_put_autosuspend(connector->dev->dev);
+
+	return ret;
+}
+
+/* okay need to be smart in here about which encoder to pick */
+static struct drm_encoder *
+amdgpu_connector_dvi_encoder(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct drm_encoder *encoder;
+
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		if (amdgpu_connector->use_digital == true) {
+			if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
+				return encoder;
+		} else {
+			if (encoder->encoder_type == DRM_MODE_ENCODER_DAC ||
+			    encoder->encoder_type == DRM_MODE_ENCODER_TVDAC)
+				return encoder;
+		}
+	}
+
+	/* see if we have a default encoder  TODO */
+
+	/* then check use digitial */
+	/* pick the first one */
+	drm_connector_for_each_possible_encoder(connector, encoder)
+		return encoder;
+
+	return NULL;
+}
+
+static void amdgpu_connector_dvi_force(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	if (connector->force == DRM_FORCE_ON)
+		amdgpu_connector->use_digital = false;
+	if (connector->force == DRM_FORCE_ON_DIGITAL)
+		amdgpu_connector->use_digital = true;
+}
+
+/**
+ * amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock
+ * @adev: pointer to amdgpu_device
+ *
+ * Return: maximum supported HDMI (TMDS) pixel clock in KHz.
+ */
+static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
+{
+	if (adev->asic_type >= CHIP_POLARIS10)
+		return 600000;
+	else if (adev->asic_type >= CHIP_TONGA)
+		return 300000;
+	else
+		return 297000;
+}
+
+/**
+ * amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors
+ * @connector: DRM connector to validate the mode on
+ * @mode: display mode to validate
+ *
+ * Validate the given display mode on DVI and HDMI connectors, including
+ * analog signals on DVI-I.
+ *
+ * Return: drm_mode_status indicating whether the mode is valid.
+ */
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+					    const struct drm_display_mode *mode)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	const int max_hdmi_pixel_clock = amdgpu_max_hdmi_pixel_clock(adev);
+	const int max_dvi_single_link_pixel_clock = 165000;
+	int max_digital_pixel_clock_khz;
+
+	/* XXX check mode bandwidth */
+
+	if (amdgpu_connector->use_digital) {
+		switch (amdgpu_connector->connector_object_id) {
+		case CONNECTOR_OBJECT_ID_HDMI_TYPE_A:
+			max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+			break;
+		case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I:
+		case CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D:
+			max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock;
+			break;
+		case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I:
+		case CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D:
+		case CONNECTOR_OBJECT_ID_HDMI_TYPE_B:
+			max_digital_pixel_clock_khz = max_dvi_single_link_pixel_clock * 2;
+			break;
+		}
+
+		/* When the display EDID claims that it's an HDMI display,
+		 * we use the HDMI encoder mode of the display HW,
+		 * so we should verify against the max HDMI clock here.
+		 */
+		if (connector->display_info.is_hdmi)
+			max_digital_pixel_clock_khz = max_hdmi_pixel_clock;
+
+		if (mode->clock > max_digital_pixel_clock_khz)
+			return MODE_CLOCK_HIGH;
+	}
+
+	/* check against the max pixel clock */
+	if ((mode->clock / 10) > adev->clock.max_pixel_clock)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_dvi_helper_funcs = {
+	.get_modes = amdgpu_connector_vga_get_modes,
+	.mode_valid = amdgpu_connector_dvi_mode_valid,
+	.best_encoder = amdgpu_connector_dvi_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_dvi_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = amdgpu_connector_dvi_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = amdgpu_connector_set_property,
+	.early_unregister = amdgpu_connector_unregister,
+	.destroy = amdgpu_connector_destroy,
+	.force = amdgpu_connector_dvi_force,
+};
+
+static int amdgpu_connector_dp_get_modes(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+	int ret;
+
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+		struct drm_display_mode *mode;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+			if (!amdgpu_dig_connector->edp_on)
+				amdgpu_atombios_encoder_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_ON);
+			amdgpu_connector_get_edid(connector);
+			ret = amdgpu_connector_ddc_get_modes(connector);
+			if (!amdgpu_dig_connector->edp_on)
+				amdgpu_atombios_encoder_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_OFF);
+		} else {
+			/* need to setup ddc on the bridge */
+			if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+			    ENCODER_OBJECT_ID_NONE) {
+				if (encoder)
+					amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+			}
+			amdgpu_connector_get_edid(connector);
+			ret = amdgpu_connector_ddc_get_modes(connector);
+		}
+
+		if (ret > 0) {
+			if (encoder) {
+				amdgpu_connector_fixup_lcd_native_mode(encoder, connector);
+				/* add scaled modes */
+				amdgpu_connector_add_common_modes(encoder, connector);
+			}
+			return ret;
+		}
+
+		if (!encoder)
+			return 0;
+
+		/* we have no EDID modes */
+		mode = amdgpu_connector_lcd_native_mode(encoder);
+		if (mode) {
+			ret = 1;
+			drm_mode_probed_add(connector, mode);
+			/* add the width/height from vbios tables if available */
+			connector->display_info.width_mm = mode->width_mm;
+			connector->display_info.height_mm = mode->height_mm;
+			/* add scaled modes */
+			amdgpu_connector_add_common_modes(encoder, connector);
+		}
+	} else {
+		/* need to setup ddc on the bridge */
+		if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+			ENCODER_OBJECT_ID_NONE) {
+			if (encoder)
+				amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+		}
+		amdgpu_connector_get_edid(connector);
+		ret = amdgpu_connector_ddc_get_modes(connector);
+
+		amdgpu_get_native_mode(connector);
+	}
+
+	return ret;
+}
+
+u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+		switch (amdgpu_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_TRAVIS:
+		case ENCODER_OBJECT_ID_NUTMEG:
+			return amdgpu_encoder->encoder_id;
+		default:
+			break;
+		}
+	}
+
+	return ENCODER_OBJECT_ID_NONE;
+}
+
+static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
+{
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+	bool found = false;
+
+	drm_connector_for_each_possible_encoder(connector, encoder) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
+			found = true;
+	}
+
+	return found;
+}
+
+bool amdgpu_connector_is_dp12_capable(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if ((adev->clock.default_dispclk >= 53900) &&
+	    amdgpu_connector_encoder_is_hbr2(connector)) {
+		return true;
+	}
+
+	return false;
+}
+
+static enum drm_connector_status
+amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	enum drm_connector_status ret = connector_status_disconnected;
+	struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+	int r;
+
+	if (!drm_kms_helper_is_poll_worker()) {
+		r = pm_runtime_get_sync(connector->dev->dev);
+		if (r < 0) {
+			pm_runtime_put_autosuspend(connector->dev->dev);
+			return connector_status_disconnected;
+		}
+	}
+
+	if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
+		ret = connector->status;
+		goto out;
+	}
+
+	amdgpu_connector_free_edid(connector);
+
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+		if (encoder) {
+			struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+			struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+			/* check if panel is valid */
+			if (native_mode->hdisplay >= 320 && native_mode->vdisplay >= 240)
+				ret = connector_status_connected;
+		}
+		/* eDP is always DP */
+		amdgpu_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+		if (!amdgpu_dig_connector->edp_on)
+			amdgpu_atombios_encoder_set_edp_panel_power(connector,
+							     ATOM_TRANSMITTER_ACTION_POWER_ON);
+		if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+			ret = connector_status_connected;
+		if (!amdgpu_dig_connector->edp_on)
+			amdgpu_atombios_encoder_set_edp_panel_power(connector,
+							     ATOM_TRANSMITTER_ACTION_POWER_OFF);
+	} else if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) !=
+		   ENCODER_OBJECT_ID_NONE) {
+		/* DP bridges are always DP */
+		amdgpu_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+		/* get the DPCD from the bridge */
+		amdgpu_atombios_dp_get_dpcd(amdgpu_connector);
+
+		if (encoder) {
+			/* setup ddc on the bridge */
+			amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
+			/* bridge chips are always aux */
+			/* try DDC */
+			if (amdgpu_display_ddc_probe(amdgpu_connector, true))
+				ret = connector_status_connected;
+			else if (amdgpu_connector->dac_load_detect) { /* try load detection */
+				const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+
+				ret = encoder_funcs->detect(encoder, connector);
+			}
+		}
+	} else {
+		amdgpu_dig_connector->dp_sink_type =
+			amdgpu_atombios_dp_get_sinktype(amdgpu_connector);
+		if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
+			ret = connector_status_connected;
+			if (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
+				amdgpu_atombios_dp_get_dpcd(amdgpu_connector);
+		} else {
+			if (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
+				if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+					ret = connector_status_connected;
+			} else {
+				/* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
+				if (amdgpu_display_ddc_probe(amdgpu_connector,
+							     false))
+					ret = connector_status_connected;
+			}
+		}
+	}
+
+	amdgpu_connector_update_scratch_regs(connector, ret);
+out:
+	if (!drm_kms_helper_is_poll_worker())
+		pm_runtime_put_autosuspend(connector->dev->dev);
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		drm_dp_set_subconnector_property(&amdgpu_connector->base,
+						 ret,
+						 amdgpu_dig_connector->dpcd,
+						 amdgpu_dig_connector->downstream_ports);
+	return ret;
+}
+
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+					   const struct drm_display_mode *mode)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
+
+	/* XXX check mode bandwidth */
+
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) {
+		struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
+
+		if ((mode->hdisplay < 320) || (mode->vdisplay < 240))
+			return MODE_PANEL;
+
+		if (encoder) {
+			struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+			struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+
+			/* AVIVO hardware supports downscaling modes larger than the panel
+			 * to the panel size, but I'm not sure this is desirable.
+			 */
+			if ((mode->hdisplay > native_mode->hdisplay) ||
+			    (mode->vdisplay > native_mode->vdisplay))
+				return MODE_PANEL;
+
+			/* if scaling is disabled, block non-native modes */
+			if (amdgpu_encoder->rmx_type == RMX_OFF) {
+				if ((mode->hdisplay != native_mode->hdisplay) ||
+				    (mode->vdisplay != native_mode->vdisplay))
+					return MODE_PANEL;
+			}
+		}
+		return MODE_OK;
+	} else {
+		if ((amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (amdgpu_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+			return amdgpu_atombios_dp_mode_valid_helper(connector, mode);
+		} else {
+			if (connector->display_info.is_hdmi) {
+				/* HDMI 1.3+ supports max clock of 340 Mhz */
+				if (mode->clock > 340000)
+					return MODE_CLOCK_HIGH;
+			} else {
+				if (mode->clock > 165000)
+					return MODE_CLOCK_HIGH;
+			}
+		}
+	}
+
+	return MODE_OK;
+}
+
+static int
+amdgpu_connector_late_register(struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	int r = 0;
+
+	if (amdgpu_connector->ddc_bus->has_aux) {
+		amdgpu_connector->ddc_bus->aux.dev = amdgpu_connector->base.kdev;
+		r = drm_dp_aux_register(&amdgpu_connector->ddc_bus->aux);
+	}
+
+	return r;
+}
+
+static const struct drm_connector_helper_funcs amdgpu_connector_dp_helper_funcs = {
+	.get_modes = amdgpu_connector_dp_get_modes,
+	.mode_valid = amdgpu_connector_dp_mode_valid,
+	.best_encoder = amdgpu_connector_dvi_encoder,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_dp_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = amdgpu_connector_dp_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = amdgpu_connector_set_property,
+	.early_unregister = amdgpu_connector_unregister,
+	.destroy = amdgpu_connector_destroy,
+	.force = amdgpu_connector_dvi_force,
+	.late_register = amdgpu_connector_late_register,
+};
+
+static const struct drm_connector_funcs amdgpu_connector_edp_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = amdgpu_connector_dp_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = amdgpu_connector_set_lcd_property,
+	.early_unregister = amdgpu_connector_unregister,
+	.destroy = amdgpu_connector_destroy,
+	.force = amdgpu_connector_dvi_force,
+	.late_register = amdgpu_connector_late_register,
+};
+
+void
+amdgpu_connector_add(struct amdgpu_device *adev,
+		      uint32_t connector_id,
+		      uint32_t supported_device,
+		      int connector_type,
+		      struct amdgpu_i2c_bus_rec *i2c_bus,
+		      uint16_t connector_object_id,
+		      struct amdgpu_hpd *hpd,
+		      struct amdgpu_router *router)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector;
+	struct amdgpu_connector_atom_dig *amdgpu_dig_connector;
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+	struct i2c_adapter *ddc = NULL;
+	uint32_t subpixel_order = SubPixelNone;
+	bool shared_ddc = false;
+	bool is_dp_bridge = false;
+	bool has_aux = false;
+
+	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
+		return;
+
+	/* see if we already added it */
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		if (amdgpu_connector->connector_id == connector_id) {
+			amdgpu_connector->devices |= supported_device;
+			drm_connector_list_iter_end(&iter);
+			return;
+		}
+		if (amdgpu_connector->ddc_bus && i2c_bus->valid) {
+			if (amdgpu_connector->ddc_bus->rec.i2c_id == i2c_bus->i2c_id) {
+				amdgpu_connector->shared_ddc = true;
+				shared_ddc = true;
+			}
+			if (amdgpu_connector->router_bus && router->ddc_valid &&
+			    (amdgpu_connector->router.router_id == router->router_id)) {
+				amdgpu_connector->shared_ddc = false;
+				shared_ddc = false;
+			}
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	/* check if it's a dp bridge */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		if (amdgpu_encoder->devices & supported_device) {
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_TRAVIS:
+			case ENCODER_OBJECT_ID_NUTMEG:
+				is_dp_bridge = true;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	amdgpu_connector = kzalloc(sizeof(struct amdgpu_connector), GFP_KERNEL);
+	if (!amdgpu_connector)
+		return;
+
+	connector = &amdgpu_connector->base;
+
+	amdgpu_connector->connector_id = connector_id;
+	amdgpu_connector->devices = supported_device;
+	amdgpu_connector->shared_ddc = shared_ddc;
+	amdgpu_connector->connector_object_id = connector_object_id;
+	amdgpu_connector->hpd = *hpd;
+
+	amdgpu_connector->router = *router;
+	if (router->ddc_valid || router->cd_valid) {
+		amdgpu_connector->router_bus = amdgpu_i2c_lookup(adev, &router->i2c_info);
+		if (!amdgpu_connector->router_bus)
+			DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
+	}
+
+	if (is_dp_bridge) {
+		amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+		if (!amdgpu_dig_connector)
+			goto failed;
+		amdgpu_connector->con_priv = amdgpu_dig_connector;
+		if (i2c_bus->valid) {
+			amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+			if (amdgpu_connector->ddc_bus) {
+				has_aux = true;
+				ddc = &amdgpu_connector->ddc_bus->adapter;
+			} else {
+				DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
+		}
+		switch (connector_type) {
+		case DRM_MODE_CONNECTOR_VGA:
+		case DRM_MODE_CONNECTOR_DVIA:
+		default:
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_dp_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base,
+						 &amdgpu_connector_dp_helper_funcs);
+			connector->interlace_allowed = true;
+			connector->doublescan_allowed = true;
+			amdgpu_connector->dac_load_detect = true;
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.load_detect_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+			break;
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_DVID:
+		case DRM_MODE_CONNECTOR_HDMIA:
+		case DRM_MODE_CONNECTOR_HDMIB:
+		case DRM_MODE_CONNECTOR_DisplayPort:
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_dp_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base,
+						 &amdgpu_connector_dp_helper_funcs);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.underscan_property,
+						      UNDERSCAN_OFF);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.underscan_hborder_property,
+						      0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.underscan_vborder_property,
+						      0);
+
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.dither_property,
+						   AMDGPU_FMT_DITHER_DISABLE);
+
+			if (amdgpu_audio != 0) {
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							   adev->mode_info.audio_property,
+							   AMDGPU_AUDIO_AUTO);
+				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+			}
+
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				connector->doublescan_allowed = true;
+			else
+				connector->doublescan_allowed = false;
+			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+				amdgpu_connector->dac_load_detect = true;
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							      adev->mode_info.load_detect_property,
+							      1);
+			}
+			break;
+		case DRM_MODE_CONNECTOR_LVDS:
+		case DRM_MODE_CONNECTOR_eDP:
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_edp_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base,
+						 &amdgpu_connector_dp_helper_funcs);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
+		}
+	} else {
+		switch (connector_type) {
+		case DRM_MODE_CONNECTOR_VGA:
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (!amdgpu_connector->ddc_bus)
+					DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				else
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_vga_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
+			amdgpu_connector->dac_load_detect = true;
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.load_detect_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+			/* no HPD on analog connectors */
+			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
+			connector->interlace_allowed = true;
+			connector->doublescan_allowed = true;
+			break;
+		case DRM_MODE_CONNECTOR_DVIA:
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (!amdgpu_connector->ddc_bus)
+					DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				else
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_vga_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);
+			amdgpu_connector->dac_load_detect = true;
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.load_detect_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+			/* no HPD on analog connectors */
+			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
+			connector->interlace_allowed = true;
+			connector->doublescan_allowed = true;
+			break;
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_DVID:
+			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+			if (!amdgpu_dig_connector)
+				goto failed;
+			amdgpu_connector->con_priv = amdgpu_dig_connector;
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (!amdgpu_connector->ddc_bus)
+					DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				else
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_dvi_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
+			subpixel_order = SubPixelHorizontalRGB;
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.coherent_mode_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_property,
+						   UNDERSCAN_OFF);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_hborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_vborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+
+			if (amdgpu_audio != 0) {
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							   adev->mode_info.audio_property,
+							   AMDGPU_AUDIO_AUTO);
+				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+			}
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.dither_property,
+						   AMDGPU_FMT_DITHER_DISABLE);
+			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+				amdgpu_connector->dac_load_detect = true;
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							   adev->mode_info.load_detect_property,
+							   1);
+			}
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_DVII)
+				connector->doublescan_allowed = true;
+			else
+				connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_HDMIA:
+		case DRM_MODE_CONNECTOR_HDMIB:
+			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+			if (!amdgpu_dig_connector)
+				goto failed;
+			amdgpu_connector->con_priv = amdgpu_dig_connector;
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (!amdgpu_connector->ddc_bus)
+					DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				else
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_dvi_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.coherent_mode_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_property,
+						   UNDERSCAN_OFF);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_hborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_vborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+			if (amdgpu_audio != 0) {
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							   adev->mode_info.audio_property,
+							   AMDGPU_AUDIO_AUTO);
+				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+			}
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.dither_property,
+						   AMDGPU_FMT_DITHER_DISABLE);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				connector->doublescan_allowed = true;
+			else
+				connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_DisplayPort:
+			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+			if (!amdgpu_dig_connector)
+				goto failed;
+			amdgpu_connector->con_priv = amdgpu_dig_connector;
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (amdgpu_connector->ddc_bus) {
+					has_aux = true;
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+				} else {
+					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				}
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_dp_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
+			subpixel_order = SubPixelHorizontalRGB;
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      adev->mode_info.coherent_mode_property,
+						      1);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_property,
+						   UNDERSCAN_OFF);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_hborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.underscan_vborder_property,
+						   0);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   dev->mode_config.scaling_mode_property,
+						   DRM_MODE_SCALE_NONE);
+			if (amdgpu_audio != 0) {
+				drm_object_attach_property(&amdgpu_connector->base.base,
+							   adev->mode_info.audio_property,
+							   AMDGPU_AUDIO_AUTO);
+				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
+			}
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						   adev->mode_info.dither_property,
+						   AMDGPU_FMT_DITHER_DISABLE);
+			connector->interlace_allowed = true;
+			/* in theory with a DP to VGA converter... */
+			connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_eDP:
+			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+			if (!amdgpu_dig_connector)
+				goto failed;
+			amdgpu_connector->con_priv = amdgpu_dig_connector;
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (amdgpu_connector->ddc_bus) {
+					has_aux = true;
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+				} else {
+					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				}
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_edp_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_LVDS:
+			amdgpu_dig_connector = kzalloc(sizeof(struct amdgpu_connector_atom_dig), GFP_KERNEL);
+			if (!amdgpu_dig_connector)
+				goto failed;
+			amdgpu_connector->con_priv = amdgpu_dig_connector;
+			if (i2c_bus->valid) {
+				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);
+				if (!amdgpu_connector->ddc_bus)
+					DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				else
+					ddc = &amdgpu_connector->ddc_bus->adapter;
+			}
+			drm_connector_init_with_ddc(dev, &amdgpu_connector->base,
+						    &amdgpu_connector_lvds_funcs,
+						    connector_type,
+						    ddc);
+			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);
+			drm_object_attach_property(&amdgpu_connector->base.base,
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
+		}
+	}
+
+	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+						DRM_CONNECTOR_POLL_DISCONNECT;
+		}
+	} else
+		connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+	connector->display_info.subpixel_order = subpixel_order;
+
+	if (has_aux)
+		amdgpu_atombios_dp_aux_init(amdgpu_connector);
+
+	if (connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_eDP) {
+		drm_connector_attach_dp_subconnector_property(&amdgpu_connector->base);
+	}
+
+	return;
+
+failed:
+	drm_connector_cleanup(connector);
+	kfree(connector);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
new file mode 100644
index 000000000000..eff833b6ed31
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CONNECTORS_H__
+#define __AMDGPU_CONNECTORS_H__
+
+void amdgpu_connector_hotplug(struct drm_connector *connector);
+int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector);
+u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *connector);
+bool amdgpu_connector_is_dp12_capable(struct drm_connector *connector);
+void
+amdgpu_connector_add(struct amdgpu_device *adev,
+		      uint32_t connector_id,
+		      uint32_t supported_device,
+		      int connector_type,
+		      struct amdgpu_i2c_bus_rec *i2c_bus,
+		      uint16_t connector_object_id,
+		      struct amdgpu_hpd *hpd,
+		      struct amdgpu_router *router);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
new file mode 100644
index 000000000000..425a3e564360
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+
+static const guid_t MCE			= CPER_NOTIFY_MCE;
+static const guid_t CMC			= CPER_NOTIFY_CMC;
+static const guid_t BOOT		= BOOT_TYPE;
+
+static const guid_t CRASHDUMP		= AMD_CRASHDUMP;
+static const guid_t RUNTIME		= AMD_GPU_NONSTANDARD_ERROR;
+
+static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
+{
+	hdr->record_length += size;
+}
+
+static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
+{
+	struct tm tm;
+	time64_t now = ktime_get_real_seconds();
+
+	time64_to_tm(now, 0, &tm);
+	timestamp->seconds = tm.tm_sec;
+	timestamp->minutes = tm.tm_min;
+	timestamp->hours = tm.tm_hour;
+	timestamp->flag = 0;
+	timestamp->day = tm.tm_mday;
+	timestamp->month = 1 + tm.tm_mon;
+	timestamp->year = (1900 + tm.tm_year) % 100;
+	timestamp->century = (1900 + tm.tm_year) / 100;
+}
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+				struct cper_hdr *hdr,
+				enum amdgpu_cper_type type,
+				enum cper_error_severity sev)
+{
+	char record_id[16];
+
+	hdr->signature[0]		= 'C';
+	hdr->signature[1]		= 'P';
+	hdr->signature[2]		= 'E';
+	hdr->signature[3]		= 'R';
+	hdr->revision			= CPER_HDR_REV_1;
+	hdr->signature_end		= 0xFFFFFFFF;
+	hdr->error_severity		= sev;
+
+	hdr->valid_bits.platform_id	= 1;
+	hdr->valid_bits.timestamp	= 1;
+
+	amdgpu_cper_get_timestamp(&hdr->timestamp);
+
+	snprintf(record_id, 9, "%d:%X",
+		 (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+			 adev->smuio.funcs->get_socket_id(adev) :
+			 0,
+		 atomic_inc_return(&adev->cper.unique_id));
+	memcpy(hdr->record_id, record_id, 8);
+
+	snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
+		 adev->pdev->vendor, adev->pdev->device);
+	/* pmfw version should be part of creator_id according to CPER spec */
+	snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
+
+	switch (type) {
+	case AMDGPU_CPER_TYPE_BOOT:
+		hdr->notify_type = BOOT;
+		break;
+	case AMDGPU_CPER_TYPE_FATAL:
+	case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+		hdr->notify_type = MCE;
+		break;
+	case AMDGPU_CPER_TYPE_RUNTIME:
+		if (sev == CPER_SEV_NON_FATAL_CORRECTED)
+			hdr->notify_type = CMC;
+		else
+			hdr->notify_type = MCE;
+		break;
+	default:
+		dev_err(adev->dev, "Unknown CPER Type\n");
+		break;
+	}
+
+	__inc_entry_length(hdr, HDR_LEN);
+}
+
+static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
+					       struct cper_sec_desc *section_desc,
+					       bool bp_threshold,
+					       bool poison,
+					       enum cper_error_severity sev,
+					       guid_t sec_type,
+					       uint32_t section_length,
+					       uint32_t section_offset)
+{
+	section_desc->revision_minor		= CPER_SEC_MINOR_REV_1;
+	section_desc->revision_major		= CPER_SEC_MAJOR_REV_22;
+	section_desc->sec_offset		= section_offset;
+	section_desc->sec_length		= section_length;
+	section_desc->valid_bits.fru_text	= 1;
+	section_desc->flag_bits.primary		= 1;
+	section_desc->severity			= sev;
+	section_desc->sec_type			= sec_type;
+
+	snprintf(section_desc->fru_text, 20, "OAM%d",
+		 (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+			 adev->smuio.funcs->get_socket_id(adev) :
+			 0);
+
+	if (bp_threshold)
+		section_desc->flag_bits.exceed_err_threshold = 1;
+	if (poison)
+		section_desc->flag_bits.latent_err = 1;
+
+	return 0;
+}
+
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+					 struct cper_hdr *hdr,
+					 uint32_t idx,
+					 struct cper_sec_crashdump_reg_data reg_data)
+{
+	struct cper_sec_desc *section_desc;
+	struct cper_sec_crashdump_fatal *section;
+
+	section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+	section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
+		   FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
+					    CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
+					    FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+	section->body.reg_arr_size = sizeof(reg_data);
+	section->body.data = reg_data;
+
+	__inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
+
+	return 0;
+}
+
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+					   struct cper_hdr *hdr,
+					   uint32_t idx,
+					   enum cper_error_severity sev,
+					   uint32_t *reg_dump,
+					   uint32_t reg_count)
+{
+	struct cper_sec_desc *section_desc;
+	struct cper_sec_nonstd_err *section;
+	bool poison;
+
+	poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
+	section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+	section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+		   NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
+					    sev, RUNTIME, NONSTD_SEC_LEN,
+					    NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
+
+	section->hdr.valid_bits.err_info_cnt = 1;
+	section->hdr.valid_bits.err_context_cnt = 1;
+
+	section->info.error_type = RUNTIME;
+	section->info.ms_chk_bits.err_type_valid = 1;
+	section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+	section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+	memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
+
+	__inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+	return 0;
+}
+
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+						      struct cper_hdr *hdr,
+						      uint32_t idx)
+{
+	struct cper_sec_desc *section_desc;
+	struct cper_sec_nonstd_err *section;
+	uint32_t socket_id;
+
+	section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+	section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+		   NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
+					    CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN,
+					    NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+	section->hdr.valid_bits.err_info_cnt = 1;
+	section->hdr.valid_bits.err_context_cnt = 1;
+
+	section->info.error_type = RUNTIME;
+	section->info.valid_bits.ms_chk = 1;
+	section->info.ms_chk_bits.err_type_valid = 1;
+	section->info.ms_chk_bits.err_type = 1;
+	section->info.ms_chk_bits.pcc = 1;
+	section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+	section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+	/* Hardcoded Reg dump for bad page threshold CPER */
+	socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+				adev->smuio.funcs->get_socket_id(adev) :
+				0;
+	section->ctx.reg_dump[CPER_ACA_REG_CTL_LO]    = 0x1;
+	section->ctx.reg_dump[CPER_ACA_REG_CTL_HI]    = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
+	section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
+	section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO]   = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI]   = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO]  = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI]  = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
+	section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
+	section->ctx.reg_dump[CPER_ACA_REG_IPID_LO]   = (socket_id / 4) & 0x01;
+	section->ctx.reg_dump[CPER_ACA_REG_IPID_HI]   = 0x096 | (((socket_id % 4) & 0x3) << 12);
+	section->ctx.reg_dump[CPER_ACA_REG_SYND_LO]   = 0x0;
+	section->ctx.reg_dump[CPER_ACA_REG_SYND_HI]   = 0x0;
+
+	__inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+	return 0;
+}
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+					 enum amdgpu_cper_type type,
+					 uint16_t section_count)
+{
+	struct cper_hdr *hdr;
+	uint32_t size = 0;
+
+	size += HDR_LEN;
+	size += (SEC_DESC_LEN * section_count);
+
+	switch (type) {
+	case AMDGPU_CPER_TYPE_RUNTIME:
+	case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+		size += (NONSTD_SEC_LEN * section_count);
+		break;
+	case AMDGPU_CPER_TYPE_FATAL:
+		size += (FATAL_SEC_LEN * section_count);
+		break;
+	case AMDGPU_CPER_TYPE_BOOT:
+		size += (BOOT_SEC_LEN * section_count);
+		break;
+	default:
+		dev_err(adev->dev, "Unknown CPER Type!\n");
+		return NULL;
+	}
+
+	hdr = kzalloc(size, GFP_KERNEL);
+	if (!hdr)
+		return NULL;
+
+	/* Save this early */
+	hdr->sec_cnt = section_count;
+
+	return hdr;
+}
+
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+				   struct aca_bank *bank)
+{
+	struct cper_hdr *fatal = NULL;
+	struct cper_sec_crashdump_reg_data reg_data = { 0 };
+	struct amdgpu_ring *ring = &adev->cper.ring_buf;
+	int ret;
+
+	fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+	if (!fatal) {
+		dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+		return -ENOMEM;
+	}
+
+	reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+	reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+	reg_data.addr_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+	reg_data.addr_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+	reg_data.ipid_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+	reg_data.ipid_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+	reg_data.synd_lo   = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+	reg_data.synd_hi   = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+	amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+	ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+	if (ret)
+		return ret;
+
+	amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+	kfree(fatal);
+
+	return 0;
+}
+
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
+{
+	struct cper_hdr *bp_threshold = NULL;
+	struct amdgpu_ring *ring = &adev->cper.ring_buf;
+	int ret;
+
+	bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
+	if (!bp_threshold) {
+		dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
+		return -ENOMEM;
+	}
+
+	amdgpu_cper_entry_fill_hdr(adev, bp_threshold,
+				   AMDGPU_CPER_TYPE_BP_THRESHOLD,
+				   CPER_SEV_FATAL);
+	ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
+	if (ret)
+		return ret;
+
+	amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+	kfree(bp_threshold);
+
+	return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+								enum aca_error_type aca_err_type)
+{
+	switch (aca_err_type) {
+	case ACA_ERROR_TYPE_UE:
+		return CPER_SEV_FATAL;
+	case ACA_ERROR_TYPE_CE:
+		return CPER_SEV_NON_FATAL_CORRECTED;
+	case ACA_ERROR_TYPE_DEFERRED:
+		return CPER_SEV_NON_FATAL_UNCORRECTED;
+	default:
+		dev_err(adev->dev, "Unknown ACA error type!\n");
+		return CPER_SEV_FATAL;
+	}
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+				    struct aca_banks *banks,
+				    uint16_t bank_count)
+{
+	struct cper_hdr *corrected = NULL;
+	enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+	struct amdgpu_ring *ring = &adev->cper.ring_buf;
+	uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+	struct aca_bank_node *node;
+	struct aca_bank *bank;
+	uint32_t i = 0;
+	int ret;
+
+	corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+	if (!corrected) {
+		dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+		return -ENOMEM;
+	}
+
+	/* Raise severity if any DE is detected in the ACA bank list */
+	list_for_each_entry(node, &banks->list, node) {
+		bank = &node->bank;
+		if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+			sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+			break;
+		}
+	}
+
+	amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+	/* Combine CE and DE in cper record */
+	list_for_each_entry(node, &banks->list, node) {
+		bank = &node->bank;
+		reg_data[CPER_ACA_REG_CTL_LO]    = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+		reg_data[CPER_ACA_REG_CTL_HI]    = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+		reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+		reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+		reg_data[CPER_ACA_REG_ADDR_LO]   = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+		reg_data[CPER_ACA_REG_ADDR_HI]   = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+		reg_data[CPER_ACA_REG_MISC0_LO]  = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+		reg_data[CPER_ACA_REG_MISC0_HI]  = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+		reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+		reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+		reg_data[CPER_ACA_REG_IPID_LO]   = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+		reg_data[CPER_ACA_REG_IPID_HI]   = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+		reg_data[CPER_ACA_REG_SYND_LO]   = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+		reg_data[CPER_ACA_REG_SYND_HI]   = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+		ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+				amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+				reg_data, CPER_ACA_REG_COUNT);
+		if (ret)
+			return ret;
+	}
+
+	amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+	kfree(corrected);
+
+	return 0;
+}
+
+static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
+{
+	struct cper_hdr *chdr;
+
+	chdr = (struct cper_hdr *)&(ring->ring[pos]);
+	return strcmp(chdr->signature, "CPER") ? false : true;
+}
+
+static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
+{
+	struct cper_hdr *chdr;
+	u64 p;
+	u32 chunk, rec_len = 0;
+
+	chdr = (struct cper_hdr *)&(ring->ring[pos]);
+	chunk = ring->ring_size - (pos << 2);
+
+	if (!strcmp(chdr->signature, "CPER")) {
+		rec_len = chdr->record_length;
+		goto calc;
+	}
+
+	/* ring buffer is not full, no cper data after ring->wptr */
+	if (ring->count_dw)
+		goto calc;
+
+	for (p = pos + 1; p <= ring->buf_mask; p++) {
+		chdr = (struct cper_hdr *)&(ring->ring[p]);
+		if (!strcmp(chdr->signature, "CPER")) {
+			rec_len = (p - pos) << 2;
+			goto calc;
+		}
+	}
+
+calc:
+	if (!rec_len)
+		return chunk;
+	else
+		return umin(rec_len, chunk);
+}
+
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
+{
+	u64 pos, wptr_old, rptr;
+	int rec_cnt_dw = count >> 2;
+	u32 chunk, ent_sz;
+	u8 *s = (u8 *)src;
+
+	if (count >= ring->ring_size - 4) {
+		dev_err(ring->adev->dev,
+			"CPER data size(%d) is larger than ring size(%d)\n",
+			count, ring->ring_size - 4);
+
+		return;
+	}
+
+	mutex_lock(&ring->adev->cper.ring_lock);
+
+	wptr_old = ring->wptr;
+	rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
+
+	while (count) {
+		ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
+		chunk = umin(ent_sz, count);
+
+		memcpy(&ring->ring[ring->wptr], s, chunk);
+
+		ring->wptr += (chunk >> 2);
+		ring->wptr &= ring->ptr_mask;
+		count -= chunk;
+		s += chunk;
+	}
+
+	if (ring->count_dw < rec_cnt_dw)
+		ring->count_dw = 0;
+
+	/* the buffer is overflow, adjust rptr */
+	if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
+	    ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
+	    ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
+		pos = (ring->wptr + 1) & ring->ptr_mask;
+
+		do {
+			ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
+
+			rptr += (ent_sz >> 2);
+			rptr &= ring->ptr_mask;
+			*ring->rptr_cpu_addr = rptr;
+
+			pos = rptr;
+		} while (!amdgpu_cper_is_hdr(ring, rptr));
+	}
+
+	if (ring->count_dw >= rec_cnt_dw)
+		ring->count_dw -= rec_cnt_dw;
+	mutex_unlock(&ring->adev->cper.ring_lock);
+}
+
+static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	return *(ring->rptr_cpu_addr);
+}
+
+static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	return ring->wptr;
+}
+
+static const struct amdgpu_ring_funcs cper_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_CPER,
+	.align_mask = 0xff,
+	.support_64bit_ptrs = false,
+	.get_rptr = amdgpu_cper_ring_get_rptr,
+	.get_wptr = amdgpu_cper_ring_get_wptr,
+};
+
+static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = &(adev->cper.ring_buf);
+
+	mutex_init(&adev->cper.ring_lock);
+
+	ring->adev = NULL;
+	ring->ring_obj = NULL;
+	ring->use_doorbell = false;
+	ring->no_scheduler = true;
+	ring->funcs = &cper_ring_funcs;
+
+	sprintf(ring->name, "cper");
+	return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
+				AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_cper_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+		return 0;
+
+	r = amdgpu_cper_ring_init(adev);
+	if (r) {
+		dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
+		return r;
+	}
+
+	mutex_init(&adev->cper.cper_lock);
+
+	adev->cper.enabled = true;
+	adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
+
+	return 0;
+}
+
+int amdgpu_cper_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+		return 0;
+
+	adev->cper.enabled = false;
+
+	amdgpu_ring_fini(&(adev->cper.ring_buf));
+	adev->cper.count = 0;
+	adev->cper.wptr = 0;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
new file mode 100644
index 000000000000..353421807387
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CPER_H__
+#define __AMDGPU_CPER_H__
+
+#include "amd_cper.h"
+#include "amdgpu_aca.h"
+
+#define CPER_MAX_ALLOWED_COUNT		0x1000
+#define CPER_MAX_RING_SIZE		0X100000
+#define HDR_LEN				(sizeof(struct cper_hdr))
+#define SEC_DESC_LEN			(sizeof(struct cper_sec_desc))
+
+#define BOOT_SEC_LEN			(sizeof(struct cper_sec_crashdump_boot))
+#define FATAL_SEC_LEN			(sizeof(struct cper_sec_crashdump_fatal))
+#define NONSTD_SEC_LEN			(sizeof(struct cper_sec_nonstd_err))
+
+#define SEC_DESC_OFFSET(idx)		(HDR_LEN + (SEC_DESC_LEN * idx))
+
+#define BOOT_SEC_OFFSET(count, idx)	(HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx))
+#define FATAL_SEC_OFFSET(count, idx)	(HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx))
+#define NONSTD_SEC_OFFSET(count, idx)	(HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx))
+
+enum amdgpu_cper_type {
+	AMDGPU_CPER_TYPE_RUNTIME,
+	AMDGPU_CPER_TYPE_FATAL,
+	AMDGPU_CPER_TYPE_BOOT,
+	AMDGPU_CPER_TYPE_BP_THRESHOLD,
+};
+
+struct amdgpu_cper {
+	bool enabled;
+
+	atomic_t unique_id;
+	struct mutex cper_lock;
+
+	/* Lifetime CPERs generated */
+	uint32_t count;
+	uint32_t max_count;
+
+	uint32_t wptr;
+
+	void *ring[CPER_MAX_ALLOWED_COUNT];
+	struct amdgpu_ring ring_buf;
+	struct mutex ring_lock;
+};
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+				struct cper_hdr *hdr,
+				enum amdgpu_cper_type type,
+				enum cper_error_severity sev);
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+					 struct cper_hdr *hdr,
+					 uint32_t idx,
+					 struct cper_sec_crashdump_reg_data reg_data);
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+					   struct cper_hdr *hdr,
+					   uint32_t idx,
+					   enum cper_error_severity sev,
+					   uint32_t *reg_dump,
+					   uint32_t reg_count);
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+						      struct cper_hdr *hdr,
+						      uint32_t section_idx);
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+					 enum amdgpu_cper_type type,
+					 uint16_t section_count);
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+				   struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+				    struct aca_banks *banks,
+				    uint16_t bank_count);
+/* Bad page threshold is encoded into separated cper entry */
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
+			void *src, int count);
+int amdgpu_cper_init(struct amdgpu_device *adev);
+int amdgpu_cper_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
new file mode 100644
index 000000000000..ecdfe6cb36cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -0,0 +1,1816 @@
+/*
+ * Copyright 2008 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ */
+
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/sync_file.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_syncobj.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "amdgpu_cs.h"
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+				 struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 union drm_amdgpu_cs *cs)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+	if (cs->in.num_chunks == 0)
+		return -EINVAL;
+
+	memset(p, 0, sizeof(*p));
+	p->adev = adev;
+	p->filp = filp;
+
+	p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+	if (!p->ctx)
+		return -EINVAL;
+
+	if (atomic_read(&p->ctx->guilty)) {
+		amdgpu_ctx_put(p->ctx);
+		return -ECANCELED;
+	}
+
+	amdgpu_sync_create(&p->sync);
+	drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	return 0;
+}
+
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+			     struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+	struct drm_sched_entity *entity;
+	unsigned int i;
+	int r;
+
+	r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+				  chunk_ib->ip_instance,
+				  chunk_ib->ring, &entity);
+	if (r)
+		return r;
+
+	/*
+	 * Abort if there is no run queue associated with this entity.
+	 * Possibly because of disabled HW IP.
+	 */
+	if (entity->rq == NULL)
+		return -EINVAL;
+
+	/* Check if we can add this IB to some existing job */
+	for (i = 0; i < p->gang_size; ++i)
+		if (p->entities[i] == entity)
+			return i;
+
+	/* If not increase the gang size if possible */
+	if (i == AMDGPU_CS_GANG_SIZE)
+		return -EINVAL;
+
+	p->entities[i] = entity;
+	p->gang_size = i + 1;
+	return i;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+			   struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+			   unsigned int *num_ibs)
+{
+	int r;
+
+	r = amdgpu_cs_job_idx(p, chunk_ib);
+	if (r < 0)
+		return r;
+
+	if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
+		return -EINVAL;
+
+	++(num_ibs[r]);
+	p->gang_leader_idx = r;
+	return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+				   struct drm_amdgpu_cs_chunk_fence *data,
+				   uint32_t *offset)
+{
+	struct drm_gem_object *gobj;
+	unsigned long size;
+
+	gobj = drm_gem_object_lookup(p->filp, data->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+
+	p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	drm_gem_object_put(gobj);
+
+	size = amdgpu_bo_size(p->uf_bo);
+	if (size != PAGE_SIZE || data->offset > (size - 8))
+		return -EINVAL;
+
+	if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+		return -EINVAL;
+
+	*offset = data->offset;
+	return 0;
+}
+
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+				   struct drm_amdgpu_bo_list_in *data)
+{
+	struct drm_amdgpu_bo_list_entry *info;
+	int r;
+
+	r = amdgpu_bo_create_list_entry_array(data, &info);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+				  &p->bo_list);
+	if (r)
+		goto error_free;
+
+	kvfree(info);
+	return 0;
+
+error_free:
+	kvfree(info);
+
+	return r;
+}
+
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+			   union drm_amdgpu_cs *cs)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
+	struct amdgpu_vm *vm = &fpriv->vm;
+	uint64_t *chunk_array;
+	uint32_t uf_offset = 0;
+	size_t size;
+	int ret;
+	int i;
+
+	chunk_array = memdup_array_user(u64_to_user_ptr(cs->in.chunks),
+					cs->in.num_chunks,
+					sizeof(uint64_t));
+	if (IS_ERR(chunk_array))
+		return PTR_ERR(chunk_array);
+
+	p->nchunks = cs->in.num_chunks;
+	p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
+			    GFP_KERNEL);
+	if (!p->chunks) {
+		ret = -ENOMEM;
+		goto free_chunk;
+	}
+
+	for (i = 0; i < p->nchunks; i++) {
+		struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
+		struct drm_amdgpu_cs_chunk user_chunk;
+
+		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
+		if (copy_from_user(&user_chunk, chunk_ptr,
+				       sizeof(struct drm_amdgpu_cs_chunk))) {
+			ret = -EFAULT;
+			i--;
+			goto free_partial_kdata;
+		}
+		p->chunks[i].chunk_id = user_chunk.chunk_id;
+		p->chunks[i].length_dw = user_chunk.length_dw;
+
+		size = p->chunks[i].length_dw;
+
+		p->chunks[i].kdata = vmemdup_array_user(u64_to_user_ptr(user_chunk.chunk_data),
+							size,
+							sizeof(uint32_t));
+		if (IS_ERR(p->chunks[i].kdata)) {
+			ret = PTR_ERR(p->chunks[i].kdata);
+			i--;
+			goto free_partial_kdata;
+		}
+		size *= sizeof(uint32_t);
+
+		/* Assume the worst on the following checks */
+		ret = -EINVAL;
+		switch (p->chunks[i].chunk_id) {
+		case AMDGPU_CHUNK_ID_IB:
+			if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+				goto free_partial_kdata;
+
+			ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
+			if (ret)
+				goto free_partial_kdata;
+			break;
+
+		case AMDGPU_CHUNK_ID_FENCE:
+			if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
+				goto free_partial_kdata;
+
+			ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+						      &uf_offset);
+			if (ret)
+				goto free_partial_kdata;
+			break;
+
+		case AMDGPU_CHUNK_ID_BO_HANDLES:
+			if (size < sizeof(struct drm_amdgpu_bo_list_in))
+				goto free_partial_kdata;
+
+			/* Only a single BO list is allowed to simplify handling. */
+			if (p->bo_list)
+				goto free_partial_kdata;
+
+			ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+			break;
+
+		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+		case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+			break;
+
+		default:
+			goto free_partial_kdata;
+		}
+	}
+
+	if (!p->gang_size || (amdgpu_sriov_vf(p->adev) && p->gang_size > 1)) {
+		ret = -EINVAL;
+		goto free_all_kdata;
+	}
+
+	for (i = 0; i < p->gang_size; ++i) {
+		ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+				       num_ibs[i], &p->jobs[i],
+				       p->filp->client_id);
+		if (ret)
+			goto free_all_kdata;
+		switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+		case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+		default:
+			p->jobs[i]->enforce_isolation = false;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = true;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+			p->jobs[i]->enforce_isolation = true;
+			p->jobs[i]->run_cleaner_shader = false;
+			break;
+		}
+	}
+	p->gang_leader = p->jobs[p->gang_leader_idx];
+
+	if (p->ctx->generation != p->gang_leader->generation) {
+		ret = -ECANCELED;
+		goto free_all_kdata;
+	}
+
+	if (p->uf_bo)
+		p->gang_leader->uf_addr = uf_offset;
+	kvfree(chunk_array);
+
+	/* Use this opportunity to fill in task info for the vm */
+	amdgpu_vm_set_task_info(vm);
+
+	return 0;
+
+free_all_kdata:
+	i = p->nchunks - 1;
+free_partial_kdata:
+	for (; i >= 0; i--)
+		kvfree(p->chunks[i].kdata);
+	kvfree(p->chunks);
+	p->chunks = NULL;
+	p->nchunks = 0;
+free_chunk:
+	kvfree(chunk_array);
+
+	return ret;
+}
+
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+			   struct amdgpu_cs_chunk *chunk,
+			   unsigned int *ce_preempt,
+			   unsigned int *de_preempt)
+{
+	struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_ring *ring;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	int r;
+
+	r = amdgpu_cs_job_idx(p, chunk_ib);
+	if (r < 0)
+		return r;
+
+	job = p->jobs[r];
+	ring = amdgpu_job_ring(job);
+	ib = &job->ibs[job->num_ibs++];
+
+	/* submissions to kernel queues are disabled */
+	if (ring->no_user_submission)
+		return -EINVAL;
+
+	/* MM engine doesn't support user fences */
+	if (p->uf_bo && ring->funcs->no_user_fence)
+		return -EINVAL;
+
+	if (!p->adev->debug_enable_ce_cs &&
+	    chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+		dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
+		return -EINVAL;
+	}
+
+	if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+	    chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+		if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+			(*ce_preempt)++;
+		else
+			(*de_preempt)++;
+
+		/* Each GFX command submit allows only 1 IB max
+		 * preemptible for CE & DE */
+		if (*ce_preempt > 1 || *de_preempt > 1)
+			return -EINVAL;
+	}
+
+	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
+
+	r =  amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+			   chunk_ib->ib_bytes : 0,
+			   AMDGPU_IB_POOL_DELAYED, ib);
+	if (r) {
+		drm_err(adev_to_drm(p->adev), "Failed to get ib !\n");
+		return r;
+	}
+
+	ib->gpu_addr = chunk_ib->va_start;
+	ib->length_dw = chunk_ib->ib_bytes / 4;
+	ib->flags = chunk_ib->flags;
+	return 0;
+}
+
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+				     struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned int num_deps;
+	int i, r;
+
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ctx *ctx;
+		struct drm_sched_entity *entity;
+		struct dma_fence *fence;
+
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
+
+		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+					  deps[i].ip_instance,
+					  deps[i].ring, &entity);
+		if (r) {
+			amdgpu_ctx_put(ctx);
+			return r;
+		}
+
+		fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+		amdgpu_ctx_put(ctx);
+
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+		else if (!fence)
+			continue;
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+			struct drm_sched_fence *s_fence;
+			struct dma_fence *old = fence;
+
+			s_fence = to_drm_sched_fence(fence);
+			fence = dma_fence_get(&s_fence->scheduled);
+			dma_fence_put(old);
+		}
+
+		r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+					 uint32_t handle, u64 point,
+					 u64 flags)
+{
+	struct dma_fence *fence;
+	int r;
+
+	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+	if (r) {
+		drm_err(adev_to_drm(p->adev), "syncobj %u failed to find fence @ %llu (%d)!\n",
+			  handle, point, r);
+		return r;
+	}
+
+	r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
+	dma_fence_put(fence);
+	return r;
+}
+
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+				   struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+	unsigned int num_deps;
+	int i, r;
+
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+					      struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+	unsigned int num_deps;
+	int i, r;
+
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+						  syncobj_deps[i].point,
+						  syncobj_deps[i].flags);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+				    struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
+	unsigned int num_deps;
+	int i;
+
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	if (p->post_deps)
+		return -EINVAL;
+
+	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+				     GFP_KERNEL);
+	p->num_post_deps = 0;
+
+	if (!p->post_deps)
+		return -ENOMEM;
+
+
+	for (i = 0; i < num_deps; ++i) {
+		p->post_deps[i].syncobj =
+			drm_syncobj_find(p->filp, deps[i].handle);
+		if (!p->post_deps[i].syncobj)
+			return -EINVAL;
+		p->post_deps[i].chain = NULL;
+		p->post_deps[i].point = 0;
+		p->num_post_deps++;
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+						struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
+	unsigned int num_deps;
+	int i;
+
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+	if (p->post_deps)
+		return -EINVAL;
+
+	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+				     GFP_KERNEL);
+	p->num_post_deps = 0;
+
+	if (!p->post_deps)
+		return -ENOMEM;
+
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+		dep->chain = NULL;
+		if (syncobj_deps[i].point) {
+			dep->chain = dma_fence_chain_alloc();
+			if (!dep->chain)
+				return -ENOMEM;
+		}
+
+		dep->syncobj = drm_syncobj_find(p->filp,
+						syncobj_deps[i].handle);
+		if (!dep->syncobj) {
+			dma_fence_chain_free(dep->chain);
+			return -EINVAL;
+		}
+		dep->point = syncobj_deps[i].point;
+		p->num_post_deps++;
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+			       struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+	int i;
+
+	if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+		return -EINVAL;
+
+	for (i = 0; i < p->gang_size; ++i) {
+		p->jobs[i]->shadow_va = shadow->shadow_va;
+		p->jobs[i]->csa_va = shadow->csa_va;
+		p->jobs[i]->gds_va = shadow->gds_va;
+		p->jobs[i]->init_shadow =
+			shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
+{
+	unsigned int ce_preempt = 0, de_preempt = 0;
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		switch (chunk->chunk_id) {
+		case AMDGPU_CHUNK_ID_IB:
+			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+			r = amdgpu_cs_p2_dependencies(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+			r = amdgpu_cs_p2_syncobj_in(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+			r = amdgpu_cs_p2_syncobj_out(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+			r = amdgpu_cs_p2_shadow(p, chunk);
+			if (r)
+				return r;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* Convert microseconds to bytes. */
+static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
+{
+	if (us <= 0 || !adev->mm_stats.log2_max_MBps)
+		return 0;
+
+	/* Since accum_us is incremented by a million per second, just
+	 * multiply it by the number of MB/s to get the number of bytes.
+	 */
+	return us << adev->mm_stats.log2_max_MBps;
+}
+
+static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
+{
+	if (!adev->mm_stats.log2_max_MBps)
+		return 0;
+
+	return bytes >> adev->mm_stats.log2_max_MBps;
+}
+
+/* Returns how many bytes TTM can move right now. If no bytes can be moved,
+ * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
+ * which means it can go over the threshold once. If that happens, the driver
+ * will be in debt and no other buffer migrations can be done until that debt
+ * is repaid.
+ *
+ * This approach allows moving a buffer of any size (it's important to allow
+ * that).
+ *
+ * The currency is simply time in microseconds and it increases as the clock
+ * ticks. The accumulated microseconds (us) are converted to bytes and
+ * returned.
+ */
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+					      u64 *max_bytes,
+					      u64 *max_vis_bytes)
+{
+	s64 time_us, increment_us;
+	u64 free_vram, total_vram, used_vram;
+	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
+	 * throttling.
+	 *
+	 * It means that in order to get full max MBps, at least 5 IBs per
+	 * second must be submitted and not more than 200ms apart from each
+	 * other.
+	 */
+	const s64 us_upper_bound = 200000;
+
+	if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
+		*max_bytes = 0;
+		*max_vis_bytes = 0;
+		return;
+	}
+
+	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
+	used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
+
+	spin_lock(&adev->mm_stats.lock);
+
+	/* Increase the amount of accumulated us. */
+	time_us = ktime_to_us(ktime_get());
+	increment_us = time_us - adev->mm_stats.last_update_us;
+	adev->mm_stats.last_update_us = time_us;
+	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
+				      us_upper_bound);
+
+	/* This prevents the short period of low performance when the VRAM
+	 * usage is low and the driver is in debt or doesn't have enough
+	 * accumulated us to fill VRAM quickly.
+	 *
+	 * The situation can occur in these cases:
+	 * - a lot of VRAM is freed by userspace
+	 * - the presence of a big buffer causes a lot of evictions
+	 *   (solution: split buffers into smaller ones)
+	 *
+	 * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
+	 * accum_us to a positive number.
+	 */
+	if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
+		s64 min_us;
+
+		/* Be more aggressive on dGPUs. Try to fill a portion of free
+		 * VRAM now.
+		 */
+		if (!(adev->flags & AMD_IS_APU))
+			min_us = bytes_to_us(adev, free_vram / 4);
+		else
+			min_us = 0; /* Reset accum_us on APUs. */
+
+		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
+	}
+
+	/* This is set to 0 if the driver is in debt to disallow (optional)
+	 * buffer moves.
+	 */
+	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+	/* Do the same for visible VRAM if half of it is free */
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
+		u64 total_vis_vram = adev->gmc.visible_vram_size;
+		u64 used_vis_vram =
+		  amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+
+		if (used_vis_vram < total_vis_vram) {
+			u64 free_vis_vram = total_vis_vram - used_vis_vram;
+
+			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+							  increment_us, us_upper_bound);
+
+			if (free_vis_vram >= total_vis_vram / 2)
+				adev->mm_stats.accum_us_vis =
+					max(bytes_to_us(adev, free_vis_vram / 2),
+					    adev->mm_stats.accum_us_vis);
+		}
+
+		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+	} else {
+		*max_vis_bytes = 0;
+	}
+
+	spin_unlock(&adev->mm_stats.lock);
+}
+
+/* Report how many bytes have really been moved for the last command
+ * submission. This can result in a debt that can stop buffer migrations
+ * temporarily.
+ */
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+				  u64 num_vis_bytes)
+{
+	spin_lock(&adev->mm_stats.lock);
+	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
+	spin_unlock(&adev->mm_stats.lock);
+}
+
+static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_cs_parser *p = param;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+		.resv = bo->tbo.base.resv
+	};
+	uint32_t domain;
+	int r;
+
+	if (bo->tbo.pin_count)
+		return 0;
+
+	/* Don't move this buffer if we have depleted our allowance
+	 * to move it. Don't move anything if the threshold is zero.
+	 */
+	if (p->bytes_moved < p->bytes_moved_threshold &&
+	    (!bo->tbo.base.dma_buf ||
+	    list_empty(&bo->tbo.base.dma_buf->attachments))) {
+		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
+			 * visible VRAM if we've depleted our allowance to do
+			 * that.
+			 */
+			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+				domain = bo->preferred_domains;
+			else
+				domain = bo->allowed_domains;
+		} else {
+			domain = bo->preferred_domains;
+		}
+	} else {
+		domain = bo->allowed_domains;
+	}
+
+retry:
+	amdgpu_bo_placement_from_domain(bo, domain);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+	p->bytes_moved += ctx.bytes_moved;
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+	    amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+		p->bytes_moved_vis += ctx.bytes_moved;
+
+	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
+		domain = bo->allowed_domains;
+		goto retry;
+	}
+
+	return r;
+}
+
+static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+				union drm_amdgpu_cs *cs)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_list_entry *e;
+	struct drm_gem_object *obj;
+	unsigned long index;
+	unsigned int i;
+	int r;
+
+	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+	if (cs->in.bo_list_handle) {
+		if (p->bo_list)
+			return -EINVAL;
+
+		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+				       &p->bo_list);
+		if (r)
+			return r;
+	} else if (!p->bo_list) {
+		/* Create a empty bo_list when no handle is provided */
+		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+					  &p->bo_list);
+		if (r)
+			return r;
+	}
+
+	mutex_lock(&p->bo_list->bo_list_mutex);
+
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		bool userpage_invalidated = false;
+		struct amdgpu_bo *bo = e->bo;
+
+		e->range = amdgpu_hmm_range_alloc(NULL);
+		if (unlikely(!e->range))
+			return -ENOMEM;
+
+		r = amdgpu_ttm_tt_get_user_pages(bo, e->range);
+		if (r)
+			goto out_free_user_pages;
+
+		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+			if (bo->tbo.ttm->pages[i] !=
+				hmm_pfn_to_page(e->range->hmm_range.hmm_pfns[i])) {
+				userpage_invalidated = true;
+				break;
+			}
+		}
+		e->user_invalidated = userpage_invalidated;
+	}
+
+	drm_exec_until_all_locked(&p->exec) {
+		r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
+		drm_exec_retry_on_contention(&p->exec);
+		if (unlikely(r))
+			goto out_free_user_pages;
+
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			/* One fence for TTM and one for each CS job */
+			r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
+						 1 + p->gang_size);
+			drm_exec_retry_on_contention(&p->exec);
+			if (unlikely(r))
+				goto out_free_user_pages;
+
+			e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
+		}
+
+		if (p->uf_bo) {
+			r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
+						 1 + p->gang_size);
+			drm_exec_retry_on_contention(&p->exec);
+			if (unlikely(r))
+				goto out_free_user_pages;
+		}
+	}
+
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct mm_struct *usermm;
+
+		usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
+		if (usermm && usermm != current->mm) {
+			r = -EPERM;
+			goto out_free_user_pages;
+		}
+
+		if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
+		    e->user_invalidated) {
+			amdgpu_bo_placement_from_domain(e->bo,
+							AMDGPU_GEM_DOMAIN_CPU);
+			r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
+					    &ctx);
+			if (r)
+				goto out_free_user_pages;
+
+			amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
+						     e->range);
+		}
+	}
+
+	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+					  &p->bytes_moved_vis_threshold);
+	p->bytes_moved = 0;
+	p->bytes_moved_vis = 0;
+
+	r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
+			       amdgpu_cs_bo_validate, p);
+	if (r) {
+		drm_err(adev_to_drm(p->adev), "amdgpu_vm_validate() failed.\n");
+		goto out_free_user_pages;
+	}
+
+	drm_exec_for_each_locked_object(&p->exec, index, obj) {
+		r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
+		if (unlikely(r))
+			goto out_free_user_pages;
+	}
+
+	if (p->uf_bo) {
+		r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
+		if (unlikely(r))
+			goto out_free_user_pages;
+
+		p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
+	}
+
+	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+				     p->bytes_moved_vis);
+
+	for (i = 0; i < p->gang_size; ++i)
+		amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+					 p->bo_list->gws_obj,
+					 p->bo_list->oa_obj);
+	return 0;
+
+out_free_user_pages:
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		amdgpu_hmm_range_free(e->range);
+		e->range = NULL;
+	}
+	mutex_unlock(&p->bo_list->bo_list_mutex);
+	return r;
+}
+
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
+{
+	int i, j;
+
+	if (!trace_amdgpu_cs_enabled())
+		return;
+
+	for (i = 0; i < p->gang_size; ++i) {
+		struct amdgpu_job *job = p->jobs[i];
+
+		for (j = 0; j < job->num_ibs; ++j)
+			trace_amdgpu_cs(p, job, &job->ibs[j]);
+	}
+}
+
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+			       struct amdgpu_job *job)
+{
+	struct amdgpu_ring *ring = amdgpu_job_ring(job);
+	unsigned int i;
+	int r;
+
+	/* Only for UVD/VCE VM emulation */
+	if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+		return 0;
+
+	for (i = 0; i < job->num_ibs; ++i) {
+		struct amdgpu_ib *ib = &job->ibs[i];
+		struct amdgpu_bo_va_mapping *m;
+		struct amdgpu_bo *aobj;
+		uint64_t va_start;
+		uint8_t *kptr;
+
+		va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+		r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+		if (r) {
+			drm_err(adev_to_drm(p->adev), "IB va_start is invalid\n");
+			return r;
+		}
+
+		if ((va_start + ib->length_dw * 4) >
+		    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+			drm_err(adev_to_drm(p->adev), "IB va_start+ib_bytes is invalid\n");
+			return -EINVAL;
+		}
+
+		/* the IB should be reserved at this point */
+		r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+		if (r)
+			return r;
+
+		kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
+
+		if (ring->funcs->parse_cs) {
+			memcpy(ib->ptr, kptr, ib->length_dw * 4);
+			amdgpu_bo_kunmap(aobj);
+
+			r = amdgpu_ring_parse_cs(ring, p, job, ib);
+			if (r)
+				return r;
+
+			if (ib->sa_bo)
+				ib->gpu_addr =  amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+		} else {
+			ib->ptr = (uint32_t *)kptr;
+			r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+			amdgpu_bo_kunmap(aobj);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+	unsigned int i;
+	int r;
+
+	for (i = 0; i < p->gang_size; ++i) {
+		r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_job *job = p->gang_leader;
+	struct amdgpu_device *adev = p->adev;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_list_entry *e;
+	struct amdgpu_bo_va *bo_va;
+	unsigned int i;
+	int r;
+
+	/*
+	 * We can't use gang submit on with reserved VMIDs when the VM changes
+	 * can't be invalidated by more than one engine at the same time.
+	 */
+	if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
+		for (i = 0; i < p->gang_size; ++i) {
+			struct drm_sched_entity *entity = p->entities[i];
+			struct drm_gpu_scheduler *sched = entity->rq->sched;
+			struct amdgpu_ring *ring = to_amdgpu_ring(sched);
+
+			if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
+				return -EINVAL;
+		}
+	}
+
+	if (!amdgpu_vm_ready(vm))
+		return -EINVAL;
+
+	r = amdgpu_vm_clear_freed(adev, vm, NULL);
+	if (r)
+		return r;
+
+	r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
+	if (r)
+		return r;
+
+	r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
+			      GFP_KERNEL);
+	if (r)
+		return r;
+
+	if (fpriv->csa_va) {
+		bo_va = fpriv->csa_va;
+		BUG_ON(!bo_va);
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
+
+		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+				      GFP_KERNEL);
+		if (r)
+			return r;
+	}
+
+	/* FIXME: In theory this loop shouldn't be needed any more when
+	 * amdgpu_vm_handle_moved handles all moved BOs that are reserved
+	 * with p->ticket. But removing it caused test regressions, so I'm
+	 * leaving it here for now.
+	 */
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		bo_va = e->bo_va;
+		if (bo_va == NULL)
+			continue;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
+
+		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+				      GFP_KERNEL);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
+	if (r)
+		return r;
+
+	r = amdgpu_vm_update_pdes(adev, vm, false);
+	if (r)
+		return r;
+
+	r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
+	if (r)
+		return r;
+
+	for (i = 0; i < p->gang_size; ++i) {
+		job = p->jobs[i];
+
+		if (!job->vm)
+			continue;
+
+		job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+	}
+
+	if (adev->debug_vm) {
+		/* Invalidate all BOs to test for userspace bugs */
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			struct amdgpu_bo *bo = e->bo;
+
+			/* ignore duplicates */
+			if (!bo)
+				continue;
+
+			amdgpu_vm_bo_invalidate(bo, false);
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct drm_gpu_scheduler *sched;
+	struct drm_gem_object *obj;
+	struct dma_fence *fence;
+	unsigned long index;
+	unsigned int i;
+	int r;
+
+	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
+	if (r) {
+		if (r != -ERESTARTSYS)
+			drm_err(adev_to_drm(p->adev), "amdgpu_ctx_wait_prev_fence failed.\n");
+		return r;
+	}
+
+	drm_exec_for_each_locked_object(&p->exec, index, obj) {
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+		struct dma_resv *resv = bo->tbo.base.resv;
+		enum amdgpu_sync_mode sync_mode;
+
+		sync_mode = amdgpu_bo_explicit_sync(bo) ?
+			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+		r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
+				     &fpriv->vm);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < p->gang_size; ++i) {
+		r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
+		if (r)
+			return r;
+	}
+
+	sched = p->gang_leader->base.entity->rq->sched;
+	while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+		struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+		/*
+		 * When we have an dependency it might be necessary to insert a
+		 * pipeline sync to make sure that all caches etc are flushed and the
+		 * next job actually sees the results from the previous one
+		 * before we start executing on the same scheduler ring.
+		 */
+		if (!s_fence || s_fence->sched != sched) {
+			dma_fence_put(fence);
+			continue;
+		}
+
+		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
+				      GFP_KERNEL);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i;
+
+	for (i = 0; i < p->num_post_deps; ++i) {
+		if (p->post_deps[i].chain && p->post_deps[i].point) {
+			drm_syncobj_add_point(p->post_deps[i].syncobj,
+					      p->post_deps[i].chain,
+					      p->fence, p->post_deps[i].point);
+			p->post_deps[i].chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
+						  p->fence);
+		}
+	}
+}
+
+static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+			    union drm_amdgpu_cs *cs)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_job *leader = p->gang_leader;
+	struct amdgpu_bo_list_entry *e;
+	struct drm_gem_object *gobj;
+	unsigned long index;
+	unsigned int i;
+	uint64_t seq;
+	int r;
+
+	for (i = 0; i < p->gang_size; ++i)
+		drm_sched_job_arm(&p->jobs[i]->base);
+
+	for (i = 0; i < p->gang_size; ++i) {
+		struct dma_fence *fence;
+
+		if (p->jobs[i] == leader)
+			continue;
+
+		fence = &p->jobs[i]->base.s_fence->scheduled;
+		dma_fence_get(fence);
+		r = drm_sched_job_add_dependency(&leader->base, fence);
+		if (r) {
+			dma_fence_put(fence);
+			return r;
+		}
+	}
+
+	if (p->gang_size > 1) {
+		for (i = 0; i < p->gang_size; ++i)
+			amdgpu_job_set_gang_leader(p->jobs[i], leader);
+	}
+
+	/* No memory allocation is allowed while holding the notifier lock.
+	 * The lock is held until amdgpu_cs_submit is finished and fence is
+	 * added to BOs.
+	 */
+	mutex_lock(&p->adev->notifier_lock);
+
+	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+	 */
+	r = 0;
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		r |= !amdgpu_hmm_range_valid(e->range);
+		amdgpu_hmm_range_free(e->range);
+		e->range = NULL;
+	}
+	if (r) {
+		r = -EAGAIN;
+		mutex_unlock(&p->adev->notifier_lock);
+		return r;
+	}
+
+	p->fence = dma_fence_get(&leader->base.s_fence->finished);
+	drm_exec_for_each_locked_object(&p->exec, index, gobj) {
+
+		ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
+
+		/* Everybody except for the gang leader uses READ */
+		for (i = 0; i < p->gang_size; ++i) {
+			if (p->jobs[i] == leader)
+				continue;
+
+			dma_resv_add_fence(gobj->resv,
+					   &p->jobs[i]->base.s_fence->finished,
+					   DMA_RESV_USAGE_READ);
+		}
+
+		/* The gang leader as remembered as writer */
+		dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
+	}
+
+	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
+				   p->fence);
+	amdgpu_cs_post_dependencies(p);
+
+	if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+	    !p->ctx->preamble_presented) {
+		leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+		p->ctx->preamble_presented = true;
+	}
+
+	cs->out.handle = seq;
+	leader->uf_sequence = seq;
+
+	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
+	for (i = 0; i < p->gang_size; ++i) {
+		amdgpu_job_free_resources(p->jobs[i]);
+		trace_amdgpu_cs_ioctl(p->jobs[i]);
+		drm_sched_entity_push_job(&p->jobs[i]->base);
+		p->jobs[i] = NULL;
+	}
+
+	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
+	mutex_unlock(&p->adev->notifier_lock);
+	mutex_unlock(&p->bo_list->bo_list_mutex);
+	return 0;
+}
+
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
+{
+	unsigned int i;
+
+	amdgpu_sync_free(&parser->sync);
+	drm_exec_fini(&parser->exec);
+
+	for (i = 0; i < parser->num_post_deps; i++) {
+		drm_syncobj_put(parser->post_deps[i].syncobj);
+		kfree(parser->post_deps[i].chain);
+	}
+	kfree(parser->post_deps);
+
+	dma_fence_put(parser->fence);
+
+	if (parser->ctx)
+		amdgpu_ctx_put(parser->ctx);
+	if (parser->bo_list)
+		amdgpu_bo_list_put(parser->bo_list);
+
+	for (i = 0; i < parser->nchunks; i++)
+		kvfree(parser->chunks[i].kdata);
+	kvfree(parser->chunks);
+	for (i = 0; i < parser->gang_size; ++i) {
+		if (parser->jobs[i])
+			amdgpu_job_free(parser->jobs[i]);
+	}
+	amdgpu_bo_unref(&parser->uf_bo);
+}
+
+int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_cs_parser parser;
+	int r;
+
+	if (amdgpu_ras_intr_triggered())
+		return -EHWPOISON;
+
+	if (!adev->accel_working)
+		return -EBUSY;
+
+	r = amdgpu_cs_parser_init(&parser, adev, filp, data);
+	if (r) {
+		drm_err_ratelimited(dev, "Failed to initialize parser %d!\n", r);
+		return r;
+	}
+
+	r = amdgpu_cs_pass1(&parser, data);
+	if (r)
+		goto error_fini;
+
+	r = amdgpu_cs_pass2(&parser);
+	if (r)
+		goto error_fini;
+
+	r = amdgpu_cs_parser_bos(&parser, data);
+	if (r) {
+		if (r == -ENOMEM)
+			drm_err(dev, "Not enough memory for command submission!\n");
+		else if (r != -ERESTARTSYS && r != -EAGAIN)
+			drm_dbg(dev, "Failed to process the buffer list %d!\n", r);
+		goto error_fini;
+	}
+
+	r = amdgpu_cs_patch_jobs(&parser);
+	if (r)
+		goto error_backoff;
+
+	r = amdgpu_cs_vm_handling(&parser);
+	if (r)
+		goto error_backoff;
+
+	r = amdgpu_cs_sync_rings(&parser);
+	if (r)
+		goto error_backoff;
+
+	trace_amdgpu_cs_ibs(&parser);
+
+	r = amdgpu_cs_submit(&parser, data);
+	if (r)
+		goto error_backoff;
+
+	amdgpu_cs_parser_fini(&parser);
+	return 0;
+
+error_backoff:
+	mutex_unlock(&parser.bo_list->bo_list_mutex);
+
+error_fini:
+	amdgpu_cs_parser_fini(&parser);
+	return r;
+}
+
+/**
+ * amdgpu_cs_wait_ioctl - wait for a command submission to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ *
+ * Wait for the command submission identified by handle to finish.
+ */
+int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *filp)
+{
+	union drm_amdgpu_wait_cs *wait = data;
+	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+	struct drm_sched_entity *entity;
+	struct amdgpu_ctx *ctx;
+	struct dma_fence *fence;
+	long r;
+
+	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+	if (ctx == NULL)
+		return -EINVAL;
+
+	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+				  wait->in.ring, &entity);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return r;
+	}
+
+	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
+	if (IS_ERR(fence))
+		r = PTR_ERR(fence);
+	else if (fence) {
+		r = dma_fence_wait_timeout(fence, true, timeout);
+		if (r > 0 && fence->error)
+			r = fence->error;
+		dma_fence_put(fence);
+	} else
+		r = 1;
+
+	amdgpu_ctx_put(ctx);
+	if (r < 0)
+		return r;
+
+	memset(wait, 0, sizeof(*wait));
+	wait->out.status = (r == 0);
+
+	return 0;
+}
+
+/**
+ * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @user: drm_amdgpu_fence copied from user space
+ */
+static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
+					     struct drm_file *filp,
+					     struct drm_amdgpu_fence *user)
+{
+	struct drm_sched_entity *entity;
+	struct amdgpu_ctx *ctx;
+	struct dma_fence *fence;
+	int r;
+
+	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
+	if (ctx == NULL)
+		return ERR_PTR(-EINVAL);
+
+	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+				  user->ring, &entity);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return ERR_PTR(r);
+	}
+
+	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
+	amdgpu_ctx_put(ctx);
+
+	return fence;
+}
+
+int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	union drm_amdgpu_fence_to_handle *info = data;
+	struct dma_fence *fence;
+	struct drm_syncobj *syncobj;
+	struct sync_file *sync_file;
+	int fd, r;
+
+	fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	if (!fence)
+		fence = dma_fence_get_stub();
+
+	switch (info->in.what) {
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
+		r = drm_syncobj_create(&syncobj, 0, fence);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+		r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
+		drm_syncobj_put(syncobj);
+		return r;
+
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
+		r = drm_syncobj_create(&syncobj, 0, fence);
+		dma_fence_put(fence);
+		if (r)
+			return r;
+		r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
+		drm_syncobj_put(syncobj);
+		return r;
+
+	case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
+		fd = get_unused_fd_flags(O_CLOEXEC);
+		if (fd < 0) {
+			dma_fence_put(fence);
+			return fd;
+		}
+
+		sync_file = sync_file_create(fence);
+		dma_fence_put(fence);
+		if (!sync_file) {
+			put_unused_fd(fd);
+			return -ENOMEM;
+		}
+
+		fd_install(fd, sync_file->file);
+		info->out.handle = fd;
+		return 0;
+
+	default:
+		dma_fence_put(fence);
+		return -EINVAL;
+	}
+}
+
+/**
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
+				     struct drm_file *filp,
+				     union drm_amdgpu_wait_fences *wait,
+				     struct drm_amdgpu_fence *fences)
+{
+	uint32_t fence_count = wait->in.fence_count;
+	unsigned int i;
+	long r = 1;
+
+	for (i = 0; i < fence_count; i++) {
+		struct dma_fence *fence;
+		unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+
+		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+		else if (!fence)
+			continue;
+
+		r = dma_fence_wait_timeout(fence, true, timeout);
+		if (r > 0 && fence->error)
+			r = fence->error;
+
+		dma_fence_put(fence);
+		if (r < 0)
+			return r;
+
+		if (r == 0)
+			break;
+	}
+
+	memset(wait, 0, sizeof(*wait));
+	wait->out.status = (r > 0);
+
+	return 0;
+}
+
+/**
+ * amdgpu_cs_wait_any_fence - wait on any fence to signal
+ *
+ * @adev: amdgpu device
+ * @filp: file private
+ * @wait: wait parameters
+ * @fences: array of drm_amdgpu_fence
+ */
+static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
+				    struct drm_file *filp,
+				    union drm_amdgpu_wait_fences *wait,
+				    struct drm_amdgpu_fence *fences)
+{
+	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
+	uint32_t fence_count = wait->in.fence_count;
+	uint32_t first = ~0;
+	struct dma_fence **array;
+	unsigned int i;
+	long r;
+
+	/* Prepare the fence array */
+	array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
+
+	if (array == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < fence_count; i++) {
+		struct dma_fence *fence;
+
+		fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			goto err_free_fence_array;
+		} else if (fence) {
+			array[i] = fence;
+		} else { /* NULL, the fence has been already signaled */
+			r = 1;
+			first = i;
+			goto out;
+		}
+	}
+
+	r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
+				       &first);
+	if (r < 0)
+		goto err_free_fence_array;
+
+out:
+	memset(wait, 0, sizeof(*wait));
+	wait->out.status = (r > 0);
+	wait->out.first_signaled = first;
+
+	if (first < fence_count && array[first])
+		r = array[first]->error;
+	else
+		r = 0;
+
+err_free_fence_array:
+	for (i = 0; i < fence_count; i++)
+		dma_fence_put(array[i]);
+	kfree(array);
+
+	return r;
+}
+
+/**
+ * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
+ *
+ * @dev: drm device
+ * @data: data from userspace
+ * @filp: file private
+ */
+int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	union drm_amdgpu_wait_fences *wait = data;
+	struct drm_amdgpu_fence *fences;
+	int r;
+
+	/* Get the fences from userspace */
+	fences = memdup_array_user(u64_to_user_ptr(wait->in.fences),
+				   wait->in.fence_count,
+				   sizeof(struct drm_amdgpu_fence));
+	if (IS_ERR(fences))
+		return PTR_ERR(fences);
+
+	if (wait->in.wait_all)
+		r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
+	else
+		r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
+
+	kfree(fences);
+
+	return r;
+}
+
+/**
+ * amdgpu_cs_find_mapping - find bo_va for VM address
+ *
+ * @parser: command submission parser context
+ * @addr: VM address
+ * @bo: resulting BO of the mapping found
+ * @map: Placeholder to return found BO mapping
+ *
+ * Search the buffer objects in the command submission context for a certain
+ * virtual memory address. Returns allocation structure when found, NULL
+ * otherwise.
+ */
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+			   uint64_t addr, struct amdgpu_bo **bo,
+			   struct amdgpu_bo_va_mapping **map)
+{
+	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_va_mapping *mapping;
+	int i, r;
+
+	addr /= AMDGPU_GPU_PAGE_SIZE;
+
+	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+		return -EINVAL;
+
+	*bo = mapping->bo_va->base.bo;
+	*map = mapping;
+
+	/* Double check that the BO is reserved by this CS */
+	if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
+		return -EINVAL;
+
+	/* Make sure VRAM is allocated contigiously */
+	(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+	    !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
+		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+		for (i = 0; i < (*bo)->placement.num_placement; i++)
+			(*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
+		if (r)
+			return r;
+	}
+
+	return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
new file mode 100644
index 000000000000..39c33ad100cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CS_H__
+#define __AMDGPU_CS_H__
+
+#include <linux/ww_mutex.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_ring.h"
+
+#define AMDGPU_CS_GANG_SIZE	4
+
+struct amdgpu_bo_va_mapping;
+
+struct amdgpu_cs_chunk {
+	uint32_t		chunk_id;
+	uint32_t		length_dw;
+	void			*kdata;
+};
+
+struct amdgpu_cs_post_dep {
+	struct drm_syncobj *syncobj;
+	struct dma_fence_chain *chain;
+	u64 point;
+};
+
+struct amdgpu_cs_parser {
+	struct amdgpu_device	*adev;
+	struct drm_file		*filp;
+	struct amdgpu_ctx	*ctx;
+
+	/* chunks */
+	unsigned		nchunks;
+	struct amdgpu_cs_chunk	*chunks;
+
+	/* scheduler job objects */
+	unsigned int		gang_size;
+	unsigned int		gang_leader_idx;
+	struct drm_sched_entity	*entities[AMDGPU_CS_GANG_SIZE];
+	struct amdgpu_job	*jobs[AMDGPU_CS_GANG_SIZE];
+	struct amdgpu_job	*gang_leader;
+
+	/* buffer objects */
+	struct drm_exec			exec;
+	struct amdgpu_bo_list		*bo_list;
+	struct amdgpu_mn		*mn;
+	struct dma_fence		*fence;
+	uint64_t			bytes_moved_threshold;
+	uint64_t			bytes_moved_vis_threshold;
+	uint64_t			bytes_moved;
+	uint64_t			bytes_moved_vis;
+
+	/* user fence */
+	struct amdgpu_bo		*uf_bo;
+
+	unsigned			num_post_deps;
+	struct amdgpu_cs_post_dep	*post_deps;
+
+	struct amdgpu_sync		sync;
+};
+
+int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
+			   uint64_t addr, struct amdgpu_bo **bo,
+			   struct amdgpu_bo_va_mapping **mapping);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
new file mode 100644
index 000000000000..02138aa55793
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+
+ * * Author: Monk.liu@amd.com
+ */
+
+#include <drm/drm_exec.h>
+
+#include "amdgpu.h"
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
+{
+	uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
+
+	addr = amdgpu_gmc_sign_extend(addr);
+
+	return addr;
+}
+
+int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
+				u32 domain, uint32_t size)
+{
+	void *ptr;
+
+	amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+				domain, bo,
+				NULL, &ptr);
+	if (!*bo)
+		return -ENOMEM;
+
+	memset(ptr, 0, size);
+	adev->virt.csa_cpu_addr = ptr;
+	return 0;
+}
+
+void amdgpu_free_static_csa(struct amdgpu_bo **bo)
+{
+	amdgpu_bo_free_kernel(bo, NULL, NULL);
+}
+
+/*
+ * amdgpu_map_static_csa should be called during amdgpu_vm_init
+ * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
+ * submission of GFX should use this virtual address within META_DATA init
+ * package to support SRIOV gfx preemption.
+ */
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
+			  uint64_t csa_addr, uint32_t size)
+{
+	struct drm_exec exec;
+	int r;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r)) {
+			DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+			goto error;
+		}
+	}
+
+	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!*bo_va) {
+		r = -ENOMEM;
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+
+	if (r) {
+		DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
+
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			    struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+			    uint64_t csa_addr)
+{
+	struct drm_exec exec;
+	int r;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r)) {
+			DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
+			goto error;
+		}
+	}
+
+	r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr);
+	if (r) {
+		DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r);
+		goto error;
+	}
+
+	amdgpu_vm_bo_del(adev, bo_va);
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
new file mode 100644
index 000000000000..7dfc1f2012eb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Monk.liu@amd.com
+ */
+
+#ifndef AMDGPU_CSA_MANAGER_H
+#define AMDGPU_CSA_MANAGER_H
+
+#define AMDGPU_CSA_SIZE		(128 * 1024)
+
+uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
+int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
+				u32 domain, uint32_t size);
+int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
+			  uint64_t csa_addr, uint32_t size);
+int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			    struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
+			    uint64_t csa_addr);
+void amdgpu_free_static_csa(struct amdgpu_bo **bo);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
new file mode 100644
index 000000000000..afedea02188d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -0,0 +1,992 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: monk liu <monk.liu@amd.com>
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_ras.h"
+#include <linux/nospec.h>
+
+#define to_amdgpu_ctx_entity(e)	\
+	container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+	[AMDGPU_HW_IP_GFX]	=	1,
+	[AMDGPU_HW_IP_COMPUTE]	=	4,
+	[AMDGPU_HW_IP_DMA]	=	2,
+	[AMDGPU_HW_IP_UVD]	=	1,
+	[AMDGPU_HW_IP_VCE]	=	1,
+	[AMDGPU_HW_IP_UVD_ENC]	=	1,
+	[AMDGPU_HW_IP_VCN_DEC]	=	1,
+	[AMDGPU_HW_IP_VCN_ENC]	=	1,
+	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
+	[AMDGPU_HW_IP_VPE]	=	1,
+};
+
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio)
+{
+	switch (ctx_prio) {
+	case AMDGPU_CTX_PRIORITY_VERY_LOW:
+	case AMDGPU_CTX_PRIORITY_LOW:
+	case AMDGPU_CTX_PRIORITY_NORMAL:
+	case AMDGPU_CTX_PRIORITY_HIGH:
+	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+		return true;
+	default:
+	case AMDGPU_CTX_PRIORITY_UNSET:
+		/* UNSET priority is not valid and we don't carry that
+		 * around, but set it to NORMAL in the only place this
+		 * function is called, amdgpu_ctx_ioctl().
+		 */
+		return false;
+	}
+}
+
+static enum drm_sched_priority
+amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
+{
+	switch (ctx_prio) {
+	case AMDGPU_CTX_PRIORITY_UNSET:
+		pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL");
+		return DRM_SCHED_PRIORITY_NORMAL;
+
+	case AMDGPU_CTX_PRIORITY_VERY_LOW:
+		return DRM_SCHED_PRIORITY_LOW;
+
+	case AMDGPU_CTX_PRIORITY_LOW:
+		return DRM_SCHED_PRIORITY_LOW;
+
+	case AMDGPU_CTX_PRIORITY_NORMAL:
+		return DRM_SCHED_PRIORITY_NORMAL;
+
+	case AMDGPU_CTX_PRIORITY_HIGH:
+		return DRM_SCHED_PRIORITY_HIGH;
+
+	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+		return DRM_SCHED_PRIORITY_HIGH;
+
+	/* This should not happen as we sanitized userspace provided priority
+	 * already, WARN if this happens.
+	 */
+	default:
+		WARN(1, "Invalid context priority %d\n", ctx_prio);
+		return DRM_SCHED_PRIORITY_NORMAL;
+	}
+
+}
+
+static int amdgpu_ctx_priority_permit(struct drm_file *filp,
+				      int32_t priority)
+{
+	/* NORMAL and below are accessible by everyone */
+	if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
+		return 0;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	if (drm_is_current_master(filp))
+		return 0;
+
+	return -EACCES;
+}
+
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
+{
+	switch (prio) {
+	case AMDGPU_CTX_PRIORITY_HIGH:
+	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+		return AMDGPU_GFX_PIPE_PRIO_HIGH;
+	default:
+		return AMDGPU_GFX_PIPE_PRIO_NORMAL;
+	}
+}
+
+static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_t prio)
+{
+	switch (prio) {
+	case AMDGPU_CTX_PRIORITY_HIGH:
+		return AMDGPU_RING_PRIO_1;
+	case AMDGPU_CTX_PRIORITY_VERY_HIGH:
+		return AMDGPU_RING_PRIO_2;
+	default:
+		return AMDGPU_RING_PRIO_0;
+	}
+}
+
+static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
+{
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	unsigned int hw_prio;
+	int32_t ctx_prio;
+
+	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+			ctx->init_priority : ctx->override_priority;
+
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_COMPUTE:
+		hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
+		break;
+	case AMDGPU_HW_IP_VCE:
+	case AMDGPU_HW_IP_VCN_ENC:
+		hw_prio = amdgpu_ctx_sched_prio_to_ring_prio(ctx_prio);
+		break;
+	default:
+		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+		break;
+	}
+
+	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
+		hw_prio = AMDGPU_RING_PRIO_DEFAULT;
+
+	return hw_prio;
+}
+
+/* Calculate the time spend on the hw */
+static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
+{
+	struct drm_sched_fence *s_fence;
+
+	if (!fence)
+		return ns_to_ktime(0);
+
+	/* When the fence is not even scheduled it can't have spend time */
+	s_fence = to_drm_sched_fence(fence);
+	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
+		return ns_to_ktime(0);
+
+	/* When it is still running account how much already spend */
+	if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
+		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);
+
+	return ktime_sub(s_fence->finished.timestamp,
+			 s_fence->scheduled.timestamp);
+}
+
+static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
+				      struct amdgpu_ctx_entity *centity)
+{
+	ktime_t res = ns_to_ktime(0);
+	uint32_t i;
+
+	spin_lock(&ctx->ring_lock);
+	for (i = 0; i < amdgpu_sched_jobs; i++) {
+		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
+	}
+	spin_unlock(&ctx->ring_lock);
+	return res;
+}
+
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
+				  const u32 ring)
+{
+	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	struct amdgpu_ctx_entity *entity;
+	enum drm_sched_priority drm_prio;
+	unsigned int hw_prio, num_scheds;
+	int32_t ctx_prio;
+	int r;
+
+	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
+			 GFP_KERNEL);
+	if (!entity)
+		return  -ENOMEM;
+
+	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+			ctx->init_priority : ctx->override_priority;
+	entity->hw_ip = hw_ip;
+	entity->sequence = 1;
+	hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+	drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
+
+	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
+
+	if (!(adev)->xcp_mgr) {
+		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+	} else {
+		struct amdgpu_fpriv *fpriv;
+
+		fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr);
+		r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv,
+						&num_scheds, &scheds);
+		if (r)
+			goto error_free_entity;
+	}
+
+	/* disable load balance if the hw engine retains context among dependent jobs */
+	if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
+	    hw_ip == AMDGPU_HW_IP_VCN_DEC ||
+	    hw_ip == AMDGPU_HW_IP_UVD_ENC ||
+	    hw_ip == AMDGPU_HW_IP_UVD) {
+		sched = drm_sched_pick_best(scheds, num_scheds);
+		scheds = &sched;
+		num_scheds = 1;
+	}
+
+	r = drm_sched_entity_init(&entity->entity, drm_prio, scheds, num_scheds,
+				  &ctx->guilty);
+	if (r)
+		goto error_free_entity;
+
+	/* It's not an error if we fail to install the new entity */
+	if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity))
+		goto cleanup_entity;
+
+	return 0;
+
+cleanup_entity:
+	drm_sched_entity_fini(&entity->entity);
+
+error_free_entity:
+	kfree(entity);
+
+	return r;
+}
+
+static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev,
+				  struct amdgpu_ctx_entity *entity)
+{
+	ktime_t res = ns_to_ktime(0);
+	int i;
+
+	if (!entity)
+		return res;
+
+	for (i = 0; i < amdgpu_sched_jobs; ++i) {
+		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
+		dma_fence_put(entity->fences[i]);
+	}
+
+	amdgpu_xcp_release_sched(adev, entity);
+
+	kfree(entity);
+	return res;
+}
+
+static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
+					u32 *stable_pstate)
+{
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	enum amd_dpm_forced_level current_level;
+
+	current_level = amdgpu_dpm_get_performance_level(adev);
+
+	switch (current_level) {
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD;
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK;
+		break;
+	default:
+		*stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
+		break;
+	}
+	return 0;
+}
+
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+			   struct drm_file *filp, struct amdgpu_ctx *ctx)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	u32 current_stable_pstate;
+	int r;
+
+	r = amdgpu_ctx_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	kref_init(&ctx->refcount);
+	ctx->mgr = mgr;
+	spin_lock_init(&ctx->ring_lock);
+
+	ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
+	ctx->reset_counter_query = ctx->reset_counter;
+	ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm);
+	ctx->init_priority = priority;
+	ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+
+	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+	if (r)
+		return r;
+
+	if (mgr->adev->pm.stable_pstate_ctx)
+		ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
+	else
+		ctx->stable_pstate = current_stable_pstate;
+
+	ctx->ctx_mgr = &(fpriv->ctx_mgr);
+	return 0;
+}
+
+static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
+					u32 stable_pstate)
+{
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	enum amd_dpm_forced_level level;
+	u32 current_stable_pstate;
+	int r;
+
+	mutex_lock(&adev->pm.stable_pstate_ctx_lock);
+	if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) {
+		r = -EBUSY;
+		goto done;
+	}
+
+	r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+	if (r || (stable_pstate == current_stable_pstate))
+		goto done;
+
+	switch (stable_pstate) {
+	case AMDGPU_CTX_STABLE_PSTATE_NONE:
+		level = AMD_DPM_FORCED_LEVEL_AUTO;
+		break;
+	case AMDGPU_CTX_STABLE_PSTATE_STANDARD:
+		level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD;
+		break;
+	case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK:
+		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK;
+		break;
+	case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK:
+		level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK;
+		break;
+	case AMDGPU_CTX_STABLE_PSTATE_PEAK:
+		level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK;
+		break;
+	default:
+		r = -EINVAL;
+		goto done;
+	}
+
+	r = amdgpu_dpm_force_performance_level(adev, level);
+
+	if (level == AMD_DPM_FORCED_LEVEL_AUTO)
+		adev->pm.stable_pstate_ctx = NULL;
+	else
+		adev->pm.stable_pstate_ctx = ctx;
+done:
+	mutex_unlock(&adev->pm.stable_pstate_ctx_lock);
+
+	return r;
+}
+
+static void amdgpu_ctx_fini(struct kref *ref)
+{
+	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	struct amdgpu_ctx_mgr *mgr = ctx->mgr;
+	struct amdgpu_device *adev = mgr->adev;
+	unsigned i, j, idx;
+
+	if (!adev)
+		return;
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+			ktime_t spend;
+
+			spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]);
+			atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
+		}
+	}
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
+		drm_dev_exit(idx);
+	}
+
+	kfree(ctx);
+}
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity)
+{
+	int r;
+	struct drm_sched_entity *ctx_entity;
+
+	if (hw_ip >= AMDGPU_HW_IP_NUM) {
+		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_DEBUG("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+		return -EINVAL;
+	}
+
+	if (ctx->entities[hw_ip][ring] == NULL) {
+		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+		if (r)
+			return r;
+	}
+
+	ctx_entity = &ctx->entities[hw_ip][ring]->entity;
+	r = drm_sched_entity_error(ctx_entity);
+	if (r) {
+		DRM_DEBUG("error entity %p\n", ctx_entity);
+		return r;
+	}
+
+	*entity = ctx_entity;
+	return 0;
+}
+
+static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
+			    struct amdgpu_fpriv *fpriv,
+			    struct drm_file *filp,
+			    int32_t priority,
+			    uint32_t *id)
+{
+	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mutex_lock(&mgr->lock);
+	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
+	if (r < 0) {
+		mutex_unlock(&mgr->lock);
+		kfree(ctx);
+		return r;
+	}
+
+	*id = (uint32_t)r;
+	r = amdgpu_ctx_init(mgr, priority, filp, ctx);
+	if (r) {
+		idr_remove(&mgr->ctx_handles, *id);
+		*id = 0;
+		kfree(ctx);
+	}
+	mutex_unlock(&mgr->lock);
+	return r;
+}
+
+static void amdgpu_ctx_do_release(struct kref *ref)
+{
+	struct amdgpu_ctx *ctx;
+	u32 i, j;
+
+	ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			if (!ctx->entities[i][j])
+				continue;
+
+			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+		}
+	}
+
+	amdgpu_ctx_fini(ref);
+}
+
+static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
+{
+	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+	struct amdgpu_ctx *ctx;
+
+	mutex_lock(&mgr->lock);
+	ctx = idr_remove(&mgr->ctx_handles, id);
+	if (ctx)
+		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+	mutex_unlock(&mgr->lock);
+	return ctx ? 0 : -EINVAL;
+}
+
+static int amdgpu_ctx_query(struct amdgpu_device *adev,
+			    struct amdgpu_fpriv *fpriv, uint32_t id,
+			    union drm_amdgpu_ctx_out *out)
+{
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr;
+	unsigned reset_counter;
+
+	if (!fpriv)
+		return -EINVAL;
+
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (!ctx) {
+		mutex_unlock(&mgr->lock);
+		return -EINVAL;
+	}
+
+	/* TODO: these two are always zero */
+	out->state.flags = 0x0;
+	out->state.hangs = 0x0;
+
+	/* determine if a GPU reset has occured since the last call */
+	reset_counter = atomic_read(&adev->gpu_reset_counter);
+	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
+	if (ctx->reset_counter_query == reset_counter)
+		out->state.reset_status = AMDGPU_CTX_NO_RESET;
+	else
+		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
+	ctx->reset_counter_query = reset_counter;
+
+	mutex_unlock(&mgr->lock);
+	return 0;
+}
+
+#define AMDGPU_RAS_COUNTE_DELAY_MS 3000
+
+static int amdgpu_ctx_query2(struct amdgpu_device *adev,
+			     struct amdgpu_fpriv *fpriv, uint32_t id,
+			     union drm_amdgpu_ctx_out *out)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr;
+
+	if (!fpriv)
+		return -EINVAL;
+
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (!ctx) {
+		mutex_unlock(&mgr->lock);
+		return -EINVAL;
+	}
+
+	out->state.flags = 0x0;
+	out->state.hangs = 0x0;
+
+	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;
+
+	if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm))
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;
+
+	if (atomic_read(&ctx->guilty))
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
+
+	if (amdgpu_in_reset(adev))
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS;
+
+	if (adev->ras_enabled && con) {
+		/* Return the cached values in O(1),
+		 * and schedule delayed work to cache
+		 * new vaues.
+		 */
+		int ce_count, ue_count;
+
+		ce_count = atomic_read(&con->ras_ce_count);
+		ue_count = atomic_read(&con->ras_ue_count);
+
+		if (ce_count != ctx->ras_counter_ce) {
+			ctx->ras_counter_ce = ce_count;
+			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+		}
+
+		if (ue_count != ctx->ras_counter_ue) {
+			ctx->ras_counter_ue = ue_count;
+			out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+		}
+
+		schedule_delayed_work(&con->ras_counte_delay_work,
+				      msecs_to_jiffies(AMDGPU_RAS_COUNTE_DELAY_MS));
+	}
+
+	mutex_unlock(&mgr->lock);
+	return 0;
+}
+
+static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev,
+				    struct amdgpu_fpriv *fpriv, uint32_t id,
+				    bool set, u32 *stable_pstate)
+{
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr;
+	int r;
+
+	if (!fpriv)
+		return -EINVAL;
+
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (!ctx) {
+		mutex_unlock(&mgr->lock);
+		return -EINVAL;
+	}
+
+	if (set)
+		r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate);
+	else
+		r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate);
+
+	mutex_unlock(&mgr->lock);
+	return r;
+}
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp)
+{
+	int r;
+	uint32_t id, stable_pstate;
+	int32_t priority;
+
+	union drm_amdgpu_ctx *args = data;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+	id = args->in.ctx_id;
+	priority = args->in.priority;
+
+	/* For backwards compatibility, we need to accept ioctls with garbage
+	 * in the priority field. Garbage values in the priority field, result
+	 * in the priority being set to NORMAL.
+	 */
+	if (!amdgpu_ctx_priority_is_valid(priority))
+		priority = AMDGPU_CTX_PRIORITY_NORMAL;
+
+	switch (args->in.op) {
+	case AMDGPU_CTX_OP_ALLOC_CTX:
+		if (args->in.flags)
+			return -EINVAL;
+		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
+		args->out.alloc.ctx_id = id;
+		break;
+	case AMDGPU_CTX_OP_FREE_CTX:
+		if (args->in.flags)
+			return -EINVAL;
+		r = amdgpu_ctx_free(fpriv, id);
+		break;
+	case AMDGPU_CTX_OP_QUERY_STATE:
+		if (args->in.flags)
+			return -EINVAL;
+		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+		break;
+	case AMDGPU_CTX_OP_QUERY_STATE2:
+		if (args->in.flags)
+			return -EINVAL;
+		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
+		break;
+	case AMDGPU_CTX_OP_GET_STABLE_PSTATE:
+		if (args->in.flags)
+			return -EINVAL;
+		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate);
+		if (!r)
+			args->out.pstate.flags = stable_pstate;
+		break;
+	case AMDGPU_CTX_OP_SET_STABLE_PSTATE:
+		if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK)
+			return -EINVAL;
+		stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK;
+		if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK)
+			return -EINVAL;
+		r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return r;
+}
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
+{
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr;
+
+	if (!fpriv)
+		return NULL;
+
+	mgr = &fpriv->ctx_mgr;
+
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (ctx)
+		kref_get(&ctx->refcount);
+	mutex_unlock(&mgr->lock);
+	return ctx;
+}
+
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
+{
+	if (ctx == NULL)
+		return -EINVAL;
+
+	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+	return 0;
+}
+
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			      struct drm_sched_entity *entity,
+			      struct dma_fence *fence)
+{
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	uint64_t seq = centity->sequence;
+	struct dma_fence *other = NULL;
+	unsigned idx = 0;
+
+	idx = seq & (amdgpu_sched_jobs - 1);
+	other = centity->fences[idx];
+	WARN_ON(other && !dma_fence_is_signaled(other));
+
+	dma_fence_get(fence);
+
+	spin_lock(&ctx->ring_lock);
+	centity->fences[idx] = fence;
+	centity->sequence++;
+	spin_unlock(&ctx->ring_lock);
+
+	atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
+		     &ctx->mgr->time_spend[centity->hw_ip]);
+
+	dma_fence_put(other);
+	return seq;
+}
+
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+				       struct drm_sched_entity *entity,
+				       uint64_t seq)
+{
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	struct dma_fence *fence;
+
+	spin_lock(&ctx->ring_lock);
+
+	if (seq == ~0ull)
+		seq = centity->sequence - 1;
+
+	if (seq >= centity->sequence) {
+		spin_unlock(&ctx->ring_lock);
+		return ERR_PTR(-EINVAL);
+	}
+
+
+	if (seq + amdgpu_sched_jobs < centity->sequence) {
+		spin_unlock(&ctx->ring_lock);
+		return NULL;
+	}
+
+	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
+	spin_unlock(&ctx->ring_lock);
+
+	return fence;
+}
+
+static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
+					   struct amdgpu_ctx_entity *aentity,
+					   int hw_ip,
+					   int32_t priority)
+{
+	struct amdgpu_device *adev = ctx->mgr->adev;
+	unsigned int hw_prio;
+	struct drm_gpu_scheduler **scheds = NULL;
+	unsigned num_scheds;
+
+	/* set sw priority */
+	drm_sched_entity_set_priority(&aentity->entity,
+				      amdgpu_ctx_to_drm_sched_prio(priority));
+
+	/* set hw priority */
+	if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
+		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
+		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
+		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
+		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+		drm_sched_entity_modify_sched(&aentity->entity, scheds,
+					      num_scheds);
+	}
+}
+
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+				  int32_t priority)
+{
+	int32_t ctx_prio;
+	unsigned i, j;
+
+	ctx->override_priority = priority;
+
+	ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
+			ctx->init_priority : ctx->override_priority;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			if (!ctx->entities[i][j])
+				continue;
+
+			amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
+						       i, ctx_prio);
+		}
+	}
+}
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity)
+{
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	struct dma_fence *other;
+	unsigned idx;
+	long r;
+
+	spin_lock(&ctx->ring_lock);
+	idx = centity->sequence & (amdgpu_sched_jobs - 1);
+	other = dma_fence_get(centity->fences[idx]);
+	spin_unlock(&ctx->ring_lock);
+
+	if (!other)
+		return 0;
+
+	r = dma_fence_wait(other, true);
+	if (r < 0 && r != -ERESTARTSYS)
+		DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+
+	dma_fence_put(other);
+	return r;
+}
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+			 struct amdgpu_device *adev)
+{
+	unsigned int i;
+
+	mgr->adev = adev;
+	mutex_init(&mgr->lock);
+	idr_init_base(&mgr->ctx_handles, 1);
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		atomic64_set(&mgr->time_spend[i], 0);
+}
+
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i, j;
+
+	idp = &mgr->ctx_handles;
+
+	mutex_lock(&mgr->lock);
+	idr_for_each_entry(idp, ctx, id) {
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
+
+				if (!ctx->entities[i][j])
+					continue;
+
+				entity = &ctx->entities[i][j]->entity;
+				timeout = drm_sched_entity_flush(entity, timeout);
+			}
+		}
+	}
+	mutex_unlock(&mgr->lock);
+	return timeout;
+}
+
+static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i, j;
+
+	idp = &mgr->ctx_handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+		if (kref_read(&ctx->refcount) != 1) {
+			DRM_ERROR("ctx %p is still alive\n", ctx);
+			continue;
+		}
+
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
+
+				if (!ctx->entities[i][j])
+					continue;
+
+				entity = &ctx->entities[i][j]->entity;
+				drm_sched_entity_fini(entity);
+			}
+		}
+		kref_put(&ctx->refcount, amdgpu_ctx_fini);
+	}
+}
+
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
+{
+	amdgpu_ctx_mgr_entity_fini(mgr);
+	idr_destroy(&mgr->ctx_handles);
+	mutex_destroy(&mgr->lock);
+}
+
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+			  ktime_t usage[AMDGPU_HW_IP_NUM])
+{
+	struct amdgpu_ctx *ctx;
+	unsigned int hw_ip, i;
+	uint32_t id;
+
+	/*
+	 * This is a little bit racy because it can be that a ctx or a fence are
+	 * destroyed just in the moment we try to account them. But that is ok
+	 * since exactly that case is explicitely allowed by the interface.
+	 */
+	mutex_lock(&mgr->lock);
+	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);
+
+		usage[hw_ip] = ns_to_ktime(ns);
+	}
+
+	idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
+		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+			for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+				struct amdgpu_ctx_entity *centity;
+				ktime_t spend;
+
+				centity = ctx->entities[hw_ip][i];
+				if (!centity)
+					continue;
+				spend = amdgpu_ctx_entity_time(ctx, centity);
+				usage[hw_ip] = ktime_add(usage[hw_ip], spend);
+			}
+		}
+	}
+	mutex_unlock(&mgr->lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
new file mode 100644
index 000000000000..090dfe86f75b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CTX_H__
+#define __AMDGPU_CTX_H__
+
+#include <linux/ktime.h>
+#include <linux/types.h>
+
+#include "amdgpu_ring.h"
+
+struct drm_device;
+struct drm_file;
+struct amdgpu_fpriv;
+struct amdgpu_ctx_mgr;
+
+#define AMDGPU_MAX_ENTITY_NUM 4
+
+struct amdgpu_ctx_entity {
+	uint32_t		hw_ip;
+	uint64_t		sequence;
+	struct drm_sched_entity	entity;
+	struct dma_fence	*fences[];
+};
+
+struct amdgpu_ctx {
+	struct kref			refcount;
+	struct amdgpu_ctx_mgr		*mgr;
+	unsigned			reset_counter;
+	unsigned			reset_counter_query;
+	uint64_t			generation;
+	spinlock_t			ring_lock;
+	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
+	bool				preamble_presented;
+	int32_t				init_priority;
+	int32_t				override_priority;
+	atomic_t			guilty;
+	unsigned long			ras_counter_ce;
+	unsigned long			ras_counter_ue;
+	uint32_t			stable_pstate;
+	struct amdgpu_ctx_mgr		*ctx_mgr;
+};
+
+struct amdgpu_ctx_mgr {
+	struct amdgpu_device	*adev;
+	struct mutex		lock;
+	/* protected by lock */
+	struct idr		ctx_handles;
+	atomic64_t		time_spend[AMDGPU_HW_IP_NUM];
+};
+
+extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			      struct drm_sched_entity *entity,
+			      struct dma_fence *fence);
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+				       struct drm_sched_entity *entity,
+				       uint64_t seq);
+bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, int32_t ctx_prio);
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity);
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
+			 struct amdgpu_device *adev);
+long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
+			  ktime_t usage[AMDGPU_HW_IP_NUM]);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
new file mode 100644
index 000000000000..62d43b8cbe58
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -0,0 +1,2175 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_dm_debugfs.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_rap.h"
+#include "amdgpu_securedisplay.h"
+#include "amdgpu_fw_attestation.h"
+#include "amdgpu_umr.h"
+
+#include "amdgpu_reset.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos:  Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62:  Indicates a GRBM bank switch is needed
+ * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
+ *	    zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23:  Indicates that the PM power gating lock should be held
+ *	    This is necessary to read registers that might be
+ *	    unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
+		char __user *buf, size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+	bool pm_pg_lock, use_bank, use_ring;
+	unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
+
+	pm_pg_lock = use_bank = use_ring = false;
+	instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
+
+	if (size & 0x3 || *pos & 0x3 ||
+			((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
+		return -EINVAL;
+
+	/* are we reading registers for which a PG lock is necessary? */
+	pm_pg_lock = (*pos >> 23) & 1;
+
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
+		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
+		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
+
+		if (se_bank == 0x3FF)
+			se_bank = 0xFFFFFFFF;
+		if (sh_bank == 0x3FF)
+			sh_bank = 0xFFFFFFFF;
+		if (instance_bank == 0x3FF)
+			instance_bank = 0xFFFFFFFF;
+		use_bank = true;
+	} else if (*pos & (1ULL << 61)) {
+
+		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
+		pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
+		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
+		vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
+
+		use_ring = true;
+	} else {
+		use_bank = use_ring = false;
+	}
+
+	*pos &= (1UL << 22) - 1;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	if (use_bank) {
+		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
+		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
+			pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
+			return -EINVAL;
+		}
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank, 0);
+	} else if (use_ring) {
+		mutex_lock(&adev->srbm_mutex);
+		amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0);
+	}
+
+	if (pm_pg_lock)
+		mutex_lock(&adev->pm.mutex);
+
+	while (size) {
+		uint32_t value;
+
+		if (read) {
+			value = RREG32(*pos >> 2);
+			r = put_user(value, (uint32_t *)buf);
+		} else {
+			r = get_user(value, (uint32_t *)buf);
+			if (!r)
+				amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
+		}
+		if (r) {
+			result = r;
+			goto end;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	} else if (use_ring) {
+		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+
+	if (pm_pg_lock)
+		mutex_unlock(&adev->pm.mutex);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	amdgpu_virt_disable_access_debugfs(adev);
+	return result;
+}
+
+/*
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
+static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
+}
+
+/*
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
+static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
+}
+
+static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file)
+{
+	struct amdgpu_debugfs_regs2_data *rd;
+
+	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+	rd->adev = file_inode(file)->i_private;
+	file->private_data = rd;
+	mutex_init(&rd->lock);
+
+	return 0;
+}
+
+static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file)
+{
+	struct amdgpu_debugfs_regs2_data *rd = file->private_data;
+
+	mutex_destroy(&rd->lock);
+	kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 offset, size_t size, int write_en)
+{
+	struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+	struct amdgpu_device *adev = rd->adev;
+	ssize_t result = 0;
+	int r;
+	uint32_t value;
+
+	if (size & 0x3 || offset & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	mutex_lock(&rd->lock);
+
+	if (rd->id.use_grbm) {
+		if ((rd->id.grbm.sh != 0xFFFFFFFF && rd->id.grbm.sh >= adev->gfx.config.max_sh_per_se) ||
+		    (rd->id.grbm.se != 0xFFFFFFFF && rd->id.grbm.se >= adev->gfx.config.max_shader_engines)) {
+			pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
+			mutex_unlock(&rd->lock);
+			return -EINVAL;
+		}
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
+						  rd->id.grbm.sh,
+						  rd->id.grbm.instance, rd->id.xcc_id);
+	}
+
+	if (rd->id.use_srbm) {
+		mutex_lock(&adev->srbm_mutex);
+		amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe,
+					    rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id);
+	}
+
+	if (rd->id.pg_lock)
+		mutex_lock(&adev->pm.mutex);
+
+	while (size) {
+		if (!write_en) {
+			value = RREG32(offset >> 2);
+			r = put_user(value, (uint32_t *)buf);
+		} else {
+			r = get_user(value, (uint32_t *)buf);
+			if (!r)
+				amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id);
+		}
+		if (r) {
+			result = r;
+			goto end;
+		}
+		offset += 4;
+		size -= 4;
+		result += 4;
+		buf += 4;
+	}
+end:
+	if (rd->id.use_grbm) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
+	if (rd->id.use_srbm) {
+		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+
+	if (rd->id.pg_lock)
+		mutex_unlock(&adev->pm.mutex);
+
+	mutex_unlock(&rd->lock);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	amdgpu_virt_disable_access_debugfs(adev);
+	return result;
+}
+
+static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+	struct amdgpu_debugfs_regs2_data *rd = f->private_data;
+	struct amdgpu_debugfs_regs2_iocdata v1_data;
+	int r;
+
+	mutex_lock(&rd->lock);
+
+	switch (cmd) {
+	case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2:
+		r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data,
+				   sizeof(rd->id));
+		if (r)
+			r = -EINVAL;
+		goto done;
+	case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE:
+		r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data,
+				   sizeof(v1_data));
+		if (r) {
+			r = -EINVAL;
+			goto done;
+		}
+		goto v1_copy;
+	default:
+		r = -EINVAL;
+		goto done;
+	}
+
+v1_copy:
+	rd->id.use_srbm = v1_data.use_srbm;
+	rd->id.use_grbm = v1_data.use_grbm;
+	rd->id.pg_lock = v1_data.pg_lock;
+	rd->id.grbm.se = v1_data.grbm.se;
+	rd->id.grbm.sh = v1_data.grbm.sh;
+	rd->id.grbm.instance = v1_data.grbm.instance;
+	rd->id.srbm.me = v1_data.srbm.me;
+	rd->id.srbm.pipe = v1_data.srbm.pipe;
+	rd->id.srbm.queue = v1_data.srbm.queue;
+	rd->id.xcc_id = 0;
+done:
+	mutex_unlock(&rd->lock);
+	return r;
+}
+
+static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+	return amdgpu_debugfs_regs2_op(f, buf, *pos, size, 0);
+}
+
+static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf, size_t size, loff_t *pos)
+{
+	return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1);
+}
+
+static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file)
+{
+	struct amdgpu_debugfs_gprwave_data *rd;
+
+	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+	rd->adev = file_inode(file)->i_private;
+	file->private_data = rd;
+	mutex_init(&rd->lock);
+
+	return 0;
+}
+
+static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file)
+{
+	struct amdgpu_debugfs_gprwave_data *rd = file->private_data;
+
+	mutex_destroy(&rd->lock);
+	kfree(file->private_data);
+	return 0;
+}
+
+static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos)
+{
+	struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+	struct amdgpu_device *adev = rd->adev;
+	ssize_t result = 0;
+	int r;
+	uint32_t *data, x;
+
+	if (size > 4096 || size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		amdgpu_virt_disable_access_debugfs(adev);
+		return -ENOMEM;
+	}
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id);
+
+	if (!rd->id.gpr_or_wave) {
+		x = 0;
+		if (adev->gfx.funcs->read_wave_data)
+			adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x);
+	} else {
+		x = size >> 2;
+		if (rd->id.gpr.vpgr_or_sgpr) {
+			if (adev->gfx.funcs->read_wave_vgprs)
+				adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data);
+		} else {
+			if (adev->gfx.funcs->read_wave_sgprs)
+				adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data);
+		}
+	}
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	if (!x) {
+		result = -EINVAL;
+		goto done;
+	}
+
+	while (size && (*pos < x * 4)) {
+		uint32_t value;
+
+		value = data[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			result = r;
+			goto done;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+done:
+	amdgpu_virt_disable_access_debugfs(adev);
+	kfree(data);
+	return result;
+}
+
+static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data)
+{
+	struct amdgpu_debugfs_gprwave_data *rd = f->private_data;
+	int r = 0;
+
+	mutex_lock(&rd->lock);
+
+	switch (cmd) {
+	case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE:
+		if (copy_from_user(&rd->id,
+				   (struct amdgpu_debugfs_gprwave_iocdata *)data,
+				   sizeof(rd->id)))
+			r = -EFAULT;
+		goto done;
+	default:
+		r = -EINVAL;
+		goto done;
+	}
+
+done:
+	mutex_unlock(&rd->lock);
+	return r;
+}
+
+
+
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		if (upper_32_bits(*pos))
+			value = RREG32_PCIE_EXT(*pos);
+		else
+			value = RREG32_PCIE(*pos);
+
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		if (upper_32_bits(*pos))
+			WREG32_PCIE_EXT(*pos, value);
+		else
+			WREG32_PCIE(*pos, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (!adev->didt_rreg)
+		return -EOPNOTSUPP;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_DIDT(*pos >> 2);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (!adev->didt_wreg)
+		return -EOPNOTSUPP;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		WREG32_DIDT(*pos >> 2, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (!adev->smc_rreg)
+		return -EOPNOTSUPP;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		value = RREG32_SMC(*pos);
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
+static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (!adev->smc_wreg)
+		return -EOPNOTSUPP;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		WREG32_SMC(*pos, value);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion.  The format is a series of DWORDs with the first
+ * indicating which revision it is.  New content is appended to the
+ * end so that older software can still read the data.
+ */
+
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 5;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	/* rev==1 */
+	config[no_regs++] = adev->rev_id;
+	config[no_regs++] = adev->pg_flags;
+	config[no_regs++] = lower_32_bits(adev->cg_flags);
+
+	/* rev==2 */
+	config[no_regs++] = adev->family;
+	config[no_regs++] = adev->external_rev_id;
+
+	/* rev==3 */
+	config[no_regs++] = adev->pdev->device;
+	config[no_regs++] = adev->pdev->revision;
+	config[no_regs++] = adev->pdev->subsystem_device;
+	config[no_regs++] = adev->pdev->subsystem_vendor;
+
+	/* rev==4 APU flag */
+	config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
+
+	/* rev==5 PG/CG flag upper 32bit */
+	config[no_regs++] = 0;
+	config[no_regs++] = upper_32_bits(adev->cg_flags);
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
+static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	int idx, x, outsize, r, valuesize;
+	uint32_t values[16];
+
+	if (size & 3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (!adev->pm.dpm_enabled)
+		return -EINVAL;
+
+	/* convert offset to sensor number */
+	idx = *pos >> 2;
+
+	valuesize = sizeof(values);
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	if (r) {
+		amdgpu_virt_disable_access_debugfs(adev);
+		return r;
+	}
+
+	if (size > valuesize) {
+		amdgpu_virt_disable_access_debugfs(adev);
+		return -EINVAL;
+	}
+
+	outsize = 0;
+	x = 0;
+	if (!r) {
+		while (size) {
+			r = put_user(values[x++], (int32_t *)buf);
+			buf += 4;
+			size -= 4;
+			outsize += 4;
+		}
+	}
+
+	amdgpu_virt_disable_access_debugfs(adev);
+	return !r ? outsize : r;
+}
+
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..6:	Byte offset into data
+ * Bits 7..14:	SE selector
+ * Bits 15..22:	SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used.  See gfx_v8_0_read_wave_data() for an example output.
+ */
+static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r, x;
+	ssize_t result = 0;
+	uint32_t offset, se, sh, cu, wave, simd, data[32];
+
+	if (size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = (*pos & GENMASK_ULL(6, 0));
+	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
+	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
+	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
+	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
+	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
+
+	x = 0;
+	if (adev->gfx.funcs->read_wave_data)
+		adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x);
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	if (!x) {
+		amdgpu_virt_disable_access_debugfs(adev);
+		return -EINVAL;
+	}
+
+	while (size && (offset < x * 4)) {
+		uint32_t value;
+
+		value = data[offset >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		offset += 4;
+		size -= 4;
+	}
+
+	amdgpu_virt_disable_access_debugfs(adev);
+	return result;
+}
+
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..11:	Byte offset into data
+ * Bits 12..19:	SE selector
+ * Bits 20..27:	SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
+static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	int r;
+	ssize_t result = 0;
+	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
+
+	if (size > 4096 || size & 3 || *pos & 3)
+		return -EINVAL;
+
+	/* decode offset */
+	offset = (*pos & GENMASK_ULL(11, 0)) >> 2;
+	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
+	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
+	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
+	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
+	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
+	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
+	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
+
+	data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0)
+		goto err;
+
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		goto err;
+
+	/* switch to the specific se/sh/cu */
+	mutex_lock(&adev->grbm_idx_mutex);
+	amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
+
+	if (bank == 0) {
+		if (adev->gfx.funcs->read_wave_vgprs)
+			adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data);
+	} else {
+		if (adev->gfx.funcs->read_wave_sgprs)
+			adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data);
+	}
+
+	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	while (size) {
+		uint32_t value;
+
+		value = data[result >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
+			goto err;
+		}
+
+		result += 4;
+		buf += 4;
+		size -= 4;
+	}
+
+	kfree(data);
+	amdgpu_virt_disable_access_debugfs(adev);
+	return result;
+
+err:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	kfree(data);
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+						    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = amdgpu_get_gfx_off_residency(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+						     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+						size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u64 value = 0;
+
+		r = amdgpu_get_gfx_off_entrycount(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (u64 *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * Write a 32-bit zero to disable or a 32-bit non-zero to enable
+ */
+static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		amdgpu_gfx_off_ctrl(adev, value ? true : false);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_read - read gfxoff status
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value = adev->gfx.gfx_off_state;
+
+		r = put_user(value, (u32 *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+						 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
+
+		r = amdgpu_get_gfx_off_status(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (u32 *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+static const struct file_operations amdgpu_debugfs_regs2_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
+	.read = amdgpu_debugfs_regs2_read,
+	.write = amdgpu_debugfs_regs2_write,
+	.open = amdgpu_debugfs_regs2_open,
+	.release = amdgpu_debugfs_regs2_release,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gprwave_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl,
+	.read = amdgpu_debugfs_gprwave_read,
+	.open = amdgpu_debugfs_gprwave_open,
+	.release = amdgpu_debugfs_gprwave_release,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_regs_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_read,
+	.write = amdgpu_debugfs_regs_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_didt_read,
+	.write = amdgpu_debugfs_regs_didt_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_pcie_read,
+	.write = amdgpu_debugfs_regs_pcie_write,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_regs_smc_read,
+	.write = amdgpu_debugfs_regs_smc_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_sensors_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_sensor_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_wave_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_wave_read,
+	.llseek = default_llseek
+};
+static const struct file_operations amdgpu_debugfs_gpr_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gpr_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_read,
+	.write = amdgpu_debugfs_gfxoff_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_status_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_count_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_residency_read,
+	.write = amdgpu_debugfs_gfxoff_residency_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations *debugfs_regs[] = {
+	&amdgpu_debugfs_regs_fops,
+	&amdgpu_debugfs_regs2_fops,
+	&amdgpu_debugfs_gprwave_fops,
+	&amdgpu_debugfs_regs_didt_fops,
+	&amdgpu_debugfs_regs_pcie_fops,
+	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
+	&amdgpu_debugfs_sensors_fops,
+	&amdgpu_debugfs_wave_fops,
+	&amdgpu_debugfs_gpr_fops,
+	&amdgpu_debugfs_gfxoff_fops,
+	&amdgpu_debugfs_gfxoff_status_fops,
+	&amdgpu_debugfs_gfxoff_count_fops,
+	&amdgpu_debugfs_gfxoff_residency_fops,
+};
+
+static const char * const debugfs_regs_names[] = {
+	"amdgpu_regs",
+	"amdgpu_regs2",
+	"amdgpu_gprwave",
+	"amdgpu_regs_didt",
+	"amdgpu_regs_pcie",
+	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
+	"amdgpu_sensors",
+	"amdgpu_wave",
+	"amdgpu_gpr",
+	"amdgpu_gfxoff",
+	"amdgpu_gfxoff_status",
+	"amdgpu_gfxoff_count",
+	"amdgpu_gfxoff_residency",
+};
+
+/**
+ * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
+ *				register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
+		ent = debugfs_create_file(debugfs_regs_names[i],
+					  S_IFREG | 0400, root,
+					  adev, debugfs_regs[i]);
+		if (!i && !IS_ERR_OR_NULL(ent))
+			i_size_write(ent->d_inode, adev->rmmio_size);
+	}
+
+	return 0;
+}
+
+static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r = 0, i;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return r;
+	}
+
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	r = down_write_killable(&adev->reset_domain->sem);
+	if (r)
+		return r;
+
+	/* hold on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!amdgpu_ring_sched_ready(ring))
+			continue;
+		drm_sched_wqueue_stop(&ring->sched);
+	}
+
+	seq_puts(m, "run ib test:\n");
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		seq_printf(m, "ib ring tests failed (%d).\n", r);
+	else
+		seq_puts(m, "ib ring tests passed.\n");
+
+	/* go on the scheduler */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!amdgpu_ring_sched_ready(ring))
+			continue;
+		drm_sched_wqueue_start(&ring->sched);
+	}
+
+	up_write(&adev->reset_domain->sem);
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return 0;
+}
+
+static int amdgpu_debugfs_evict_vram(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return r;
+	}
+
+	*val = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return 0;
+}
+
+
+static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return r;
+	}
+
+	*val = amdgpu_ttm_evict_resources(adev, TTM_PL_TT);
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return 0;
+}
+
+static int amdgpu_debugfs_benchmark(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return r;
+	}
+
+	r = amdgpu_benchmark(adev, val);
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return r;
+}
+
+static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_file *file;
+	int r;
+
+	r = mutex_lock_interruptible(&dev->filelist_mutex);
+	if (r)
+		return r;
+
+	list_for_each_entry(file, &dev->filelist, lhead) {
+		struct amdgpu_fpriv *fpriv = file->driver_priv;
+		struct amdgpu_vm *vm = &fpriv->vm;
+		struct amdgpu_task_info *ti;
+
+		ti = amdgpu_vm_get_task_info_vm(vm);
+		if (ti) {
+			seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->task.pid, ti->process_name);
+			amdgpu_vm_put_task_info(ti);
+		}
+
+		r = amdgpu_bo_reserve(vm->root.bo, true);
+		if (r)
+			break;
+		amdgpu_debugfs_vm_bo_info(vm, m);
+		amdgpu_bo_unreserve(vm->root.bo);
+	}
+
+	mutex_unlock(&dev->filelist_mutex);
+
+	return r;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_test_ib);
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_vm_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram,
+			 NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt,
+			 NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark,
+			 "%lld\n");
+
+static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
+					  struct dma_fence **fences)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	uint32_t sync_seq, last_seq;
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+	sync_seq = ring->fence_drv.sync_seq;
+
+	last_seq &= drv->num_fences_mask;
+	sync_seq &= drv->num_fences_mask;
+
+	do {
+		struct dma_fence *fence, **ptr;
+
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];
+
+		fence = rcu_dereference_protected(*ptr, 1);
+		RCU_INIT_POINTER(*ptr, NULL);
+
+		if (!fence)
+			continue;
+
+		fences[last_seq] = fence;
+
+	} while (last_seq != sync_seq);
+}
+
+static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
+					    int length)
+{
+	int i;
+	struct dma_fence *fence;
+
+	for (i = 0; i < length; i++) {
+		fence = fences[i];
+		if (!fence)
+			continue;
+		dma_fence_signal(fence);
+		dma_fence_put(fence);
+	}
+}
+
+static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *s_job;
+	struct dma_fence *fence;
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry(s_job, &sched->pending_list, list) {
+		fence = sched->ops->run_job(s_job);
+		dma_fence_put(fence);
+	}
+	spin_unlock(&sched->job_list_lock);
+}
+
+static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
+{
+	struct amdgpu_job *job;
+	struct drm_sched_job *s_job, *tmp;
+	uint32_t preempt_seq;
+	struct dma_fence *fence, **ptr;
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct drm_gpu_scheduler *sched = &ring->sched;
+	bool preempted = true;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+		return;
+
+	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
+	if (preempt_seq <= atomic_read(&drv->last_seq)) {
+		preempted = false;
+		goto no_preempt;
+	}
+
+	preempt_seq &= drv->num_fences_mask;
+	ptr = &drv->fences[preempt_seq];
+	fence = rcu_dereference_protected(*ptr, 1);
+
+no_preempt:
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
+		if (dma_fence_is_signaled(&s_job->s_fence->finished)) {
+			/* remove job from ring_mirror_list */
+			list_del_init(&s_job->list);
+			sched->ops->free_job(s_job);
+			continue;
+		}
+		job = to_amdgpu_job(s_job);
+		if (preempted && (&job->hw_fence->base) == fence)
+			/* mark the job as preempted */
+			job->preemption_status |= AMDGPU_IB_PREEMPTED;
+	}
+	spin_unlock(&sched->job_list_lock);
+}
+
+static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
+{
+	int r, length;
+	struct amdgpu_ring *ring;
+	struct dma_fence **fences = NULL;
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+	if (val >= AMDGPU_MAX_RINGS)
+		return -EINVAL;
+
+	ring = adev->rings[val];
+
+	if (!amdgpu_ring_sched_ready(ring) ||
+	    !ring->funcs->preempt_ib)
+		return -EINVAL;
+
+	/* the last preemption failed */
+	if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
+		return -EBUSY;
+
+	length = ring->fence_drv.num_fences_mask + 1;
+	fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	/* Avoid accidently unparking the sched thread during GPU reset */
+	r = down_read_killable(&adev->reset_domain->sem);
+	if (r)
+		goto pro_end;
+
+	/* stop the scheduler */
+	drm_sched_wqueue_stop(&ring->sched);
+
+	/* preempt the IB */
+	r = amdgpu_ring_preempt_ib(ring);
+	if (r) {
+		DRM_WARN("failed to preempt ring %d\n", ring->idx);
+		goto failure;
+	}
+
+	amdgpu_fence_process(ring);
+
+	if (atomic_read(&ring->fence_drv.last_seq) !=
+	    ring->fence_drv.sync_seq) {
+		DRM_INFO("ring %d was preempted\n", ring->idx);
+
+		amdgpu_ib_preempt_mark_partial_job(ring);
+
+		/* swap out the old fences */
+		amdgpu_ib_preempt_fences_swap(ring, fences);
+
+		amdgpu_fence_driver_force_completion(ring);
+
+		/* resubmit unfinished jobs */
+		amdgpu_ib_preempt_job_recovery(&ring->sched);
+
+		/* wait for jobs finished */
+		amdgpu_fence_wait_empty(ring);
+
+		/* signal the old fences */
+		amdgpu_ib_preempt_signal_fences(fences, length);
+	}
+
+failure:
+	/* restart the scheduler */
+	drm_sched_wqueue_start(&ring->sched);
+
+	up_read(&adev->reset_domain->sem);
+
+pro_end:
+	kfree(fences);
+
+	return r;
+}
+
+static int amdgpu_debugfs_sclk_set(void *data, u64 val)
+{
+	int ret = 0;
+	uint32_t max_freq, min_freq;
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+	if (amdgpu_sriov_multi_vf_mode(adev))
+		return -EINVAL;
+
+	ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return ret;
+	}
+
+	ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK, &min_freq, &max_freq);
+	if (ret == -EOPNOTSUPP) {
+		ret = 0;
+		goto out;
+	}
+	if (ret || val > max_freq || val < min_freq) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK, (uint32_t)val, (uint32_t)val);
+	if (ret)
+		ret = -EINVAL;
+
+out:
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
+			amdgpu_debugfs_ib_preempt, "%llu\n");
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
+			amdgpu_debugfs_sclk_set, "%llu\n");
+
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+	struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
+	struct dentry *ent;
+	int r, i;
+
+	if (!debugfs_initialized())
+		return 0;
+
+	debugfs_create_x32("amdgpu_smu_debug", 0600, root,
+			   &adev->pm.smu_debug_mask);
+
+	ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev,
+				  &fops_ib_preempt);
+	if (IS_ERR(ent)) {
+		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
+		return PTR_ERR(ent);
+	}
+
+	ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev,
+				  &fops_sclk_set);
+	if (IS_ERR(ent)) {
+		DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n");
+		return PTR_ERR(ent);
+	}
+
+	/* Register debugfs entries for amdgpu_ttm */
+	amdgpu_ttm_debugfs_init(adev);
+	amdgpu_debugfs_pm_init(adev);
+	amdgpu_debugfs_sa_init(adev);
+	amdgpu_debugfs_fence_init(adev);
+	amdgpu_debugfs_gem_init(adev);
+
+	r = amdgpu_debugfs_regs_init(adev);
+	if (r)
+		DRM_ERROR("registering register debugfs failed (%d).\n", r);
+
+	amdgpu_debugfs_firmware_init(adev);
+	amdgpu_ta_if_debugfs_init(adev);
+
+	amdgpu_debugfs_mes_event_log_init(adev);
+
+#if defined(CONFIG_DRM_AMD_DC)
+	if (adev->dc_enabled)
+		dtn_debugfs_init(adev);
+#endif
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring)
+			continue;
+
+		amdgpu_debugfs_ring_init(adev, ring);
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		if (!amdgpu_vcnfw_log)
+			break;
+
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+
+		amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
+	}
+
+	if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
+		amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+
+	amdgpu_debugfs_vcn_sched_mask_init(adev);
+	amdgpu_debugfs_jpeg_sched_mask_init(adev);
+	amdgpu_debugfs_gfx_sched_mask_init(adev);
+	amdgpu_debugfs_compute_sched_mask_init(adev);
+	amdgpu_debugfs_sdma_sched_mask_init(adev);
+
+	amdgpu_ras_debugfs_create_all(adev);
+	amdgpu_rap_debugfs_init(adev);
+	amdgpu_securedisplay_debugfs_init(adev);
+	amdgpu_fw_attestation_debugfs_init(adev);
+	amdgpu_psp_debugfs_init(adev);
+
+	debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
+			    &amdgpu_evict_vram_fops);
+	debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
+			    &amdgpu_evict_gtt_fops);
+	debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
+			    &amdgpu_debugfs_test_ib_fops);
+	debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
+			    &amdgpu_debugfs_vm_info_fops);
+	debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
+			    &amdgpu_benchmark_fops);
+
+	adev->debugfs_vbios_blob.data = adev->bios;
+	adev->debugfs_vbios_blob.size = adev->bios_size;
+	debugfs_create_blob("amdgpu_vbios", 0444, root,
+			    &adev->debugfs_vbios_blob);
+
+	if (adev->discovery.debugfs_blob.size)
+		debugfs_create_blob("amdgpu_discovery", 0444, root,
+				    &adev->discovery.debugfs_blob);
+
+	return 0;
+}
+
+static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
+{
+	struct drm_file *file;
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_bo *root_bo;
+	struct amdgpu_device *adev;
+	int r;
+
+	file = m->private;
+	if (!file)
+		return -EINVAL;
+
+	adev = drm_to_adev(file->minor->dev);
+	fpriv = file->driver_priv;
+	if (!fpriv || !fpriv->vm.root.bo)
+		return -ENODEV;
+
+	root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
+	r = amdgpu_bo_reserve(root_bo, true);
+	if (r) {
+		amdgpu_bo_unref(&root_bo);
+		return -EINVAL;
+	}
+
+	seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo));
+	seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn);
+	seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level);
+	seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size);
+	seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size);
+
+	amdgpu_bo_unreserve(root_bo);
+	amdgpu_bo_unref(&root_bo);
+
+	return 0;
+}
+
+static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, amdgpu_pt_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_pt_info_fops = {
+	.owner = THIS_MODULE,
+	.open = amdgpu_pt_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+	debugfs_create_file("vm_pagetable_info", 0444, file->debugfs_client, file,
+			    &amdgpu_pt_info_fops);
+}
+
+#else
+int amdgpu_debugfs_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+void amdgpu_debugfs_vm_init(struct drm_file *file)
+{
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
new file mode 100644
index 000000000000..e7b3c38e5186
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/*
+ * Debugfs
+ */
+
+int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
+int amdgpu_debugfs_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_vm_init(struct drm_file *file);
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
new file mode 100644
index 000000000000..4e2fe6674db8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/devcoredump.h>
+#include "amdgpu_dev_coredump.h"
+#include "atom.h"
+
+#ifndef CONFIG_DEV_COREDUMP
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+		     bool vram_lost, struct amdgpu_job *job)
+{
+}
+#else
+
+const char *hw_ip_names[MAX_HWIP] = {
+	[GC_HWIP]		= "GC",
+	[HDP_HWIP]		= "HDP",
+	[SDMA0_HWIP]		= "SDMA0",
+	[SDMA1_HWIP]		= "SDMA1",
+	[SDMA2_HWIP]		= "SDMA2",
+	[SDMA3_HWIP]		= "SDMA3",
+	[SDMA4_HWIP]		= "SDMA4",
+	[SDMA5_HWIP]		= "SDMA5",
+	[SDMA6_HWIP]		= "SDMA6",
+	[SDMA7_HWIP]		= "SDMA7",
+	[LSDMA_HWIP]		= "LSDMA",
+	[MMHUB_HWIP]		= "MMHUB",
+	[ATHUB_HWIP]		= "ATHUB",
+	[NBIO_HWIP]		= "NBIO",
+	[MP0_HWIP]		= "MP0",
+	[MP1_HWIP]		= "MP1",
+	[UVD_HWIP]		= "UVD/JPEG/VCN",
+	[VCN1_HWIP]		= "VCN1",
+	[VCE_HWIP]		= "VCE",
+	[VPE_HWIP]		= "VPE",
+	[DF_HWIP]		= "DF",
+	[DCE_HWIP]		= "DCE",
+	[OSSSYS_HWIP]		= "OSSSYS",
+	[SMUIO_HWIP]		= "SMUIO",
+	[PWR_HWIP]		= "PWR",
+	[NBIF_HWIP]		= "NBIF",
+	[THM_HWIP]		= "THM",
+	[CLK_HWIP]		= "CLK",
+	[UMC_HWIP]		= "UMC",
+	[RSMU_HWIP]		= "RSMU",
+	[XGMI_HWIP]		= "XGMI",
+	[DCI_HWIP]		= "DCI",
+	[PCIE_HWIP]		= "PCIE",
+};
+
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
+				       struct drm_printer *p)
+{
+	uint32_t version;
+	uint32_t feature;
+	uint8_t smu_program, smu_major, smu_minor, smu_debug;
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+		   adev->vce.fb_version, adev->vce.fw_version);
+	drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->uvd.fw_version);
+	drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->gmc.fw_version);
+	drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+	drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+	drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+	drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+	drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srlc_feature_version,
+		   adev->gfx.rlc_srlc_fw_version);
+	drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srlg_feature_version,
+		   adev->gfx.rlc_srlg_fw_version);
+	drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlc_srls_feature_version,
+		   adev->gfx.rlc_srls_fw_version);
+	drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlcp_ucode_feature_version,
+		   adev->gfx.rlcp_ucode_version);
+	drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.rlcv_ucode_feature_version,
+		   adev->gfx.rlcv_ucode_version);
+	drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+		   adev->gfx.mec_feature_version, adev->gfx.mec_fw_version);
+
+	if (adev->gfx.mec2_fw)
+		drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n",
+			   adev->gfx.mec2_feature_version,
+			   adev->gfx.mec2_fw_version);
+
+	drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->gfx.imu_fw_version);
+	drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.sos.feature_version, adev->psp.sos.fw_version);
+	drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.asd_context.bin_desc.feature_version,
+		   adev->psp.asd_context.bin_desc.fw_version);
+
+	drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.xgmi_context.context.bin_desc.feature_version,
+		   adev->psp.xgmi_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.ras_context.context.bin_desc.feature_version,
+		   adev->psp.ras_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.hdcp_context.context.bin_desc.feature_version,
+		   adev->psp.hdcp_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.dtm_context.context.bin_desc.feature_version,
+		   adev->psp.dtm_context.context.bin_desc.fw_version);
+	drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.rap_context.context.bin_desc.feature_version,
+		   adev->psp.rap_context.context.bin_desc.fw_version);
+	drm_printf(p,
+		   "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+		   adev->psp.securedisplay_context.context.bin_desc.feature_version,
+		   adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+	/* SMC firmware */
+	version = adev->pm.fw_version;
+
+	smu_program = (version >> 24) & 0xff;
+	smu_major = (version >> 16) & 0xff;
+	smu_minor = (version >> 8) & 0xff;
+	smu_debug = (version >> 0) & 0xff;
+	drm_printf(p,
+		   "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+		   0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+	/* SDMA firmware */
+	for (int i = 0; i < adev->sdma.num_instances; i++) {
+		drm_printf(p,
+			   "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, adev->sdma.instance[i].feature_version,
+			   adev->sdma.instance[i].fw_version);
+	}
+
+	drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->vcn.fw_version);
+	drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->dm.dmcu_fw_version);
+	drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0,
+		   adev->dm.dmcub_fw_version);
+	drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+		   adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+	version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+	feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+		  AMDGPU_MES_FEAT_VERSION_SHIFT;
+	drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+		   feature, version);
+
+	version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+	feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+		  AMDGPU_MES_FEAT_VERSION_SHIFT;
+	drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature,
+		   version);
+
+	drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+		   adev->vpe.feature_version, adev->vpe.fw_version);
+
+	drm_printf(p, "\nVBIOS Information\n");
+	drm_printf(p, "vbios name       : %s\n", ctx->name);
+	drm_printf(p, "vbios pn         : %s\n", ctx->vbios_pn);
+	drm_printf(p, "vbios version    : %d\n", ctx->version);
+	drm_printf(p, "vbios ver_str    : %s\n", ctx->vbios_ver_str);
+	drm_printf(p, "vbios date       : %s\n", ctx->date);
+}
+
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+			void *data, size_t datalen)
+{
+	struct drm_printer p;
+	struct amdgpu_coredump_info *coredump = data;
+	struct drm_print_iterator iter;
+	struct amdgpu_vm_fault_info *fault_info;
+	struct amdgpu_ip_block *ip_block;
+	int ver;
+
+	iter.data = buffer;
+	iter.offset = 0;
+	iter.start = offset;
+	iter.remain = count;
+
+	p = drm_coredump_printer(&iter);
+
+	drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+	drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
+	drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+	drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+	drm_printf(&p, "time: %ptSp\n", &coredump->reset_time);
+
+	if (coredump->reset_task_info.task.pid)
+		drm_printf(&p, "process_name: %s PID: %d\n",
+			   coredump->reset_task_info.process_name,
+			   coredump->reset_task_info.task.pid);
+
+	/* SOC Information */
+	drm_printf(&p, "\nSOC Information\n");
+	drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device);
+	drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision);
+	drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+	drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+	drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
+
+	/* Memory Information */
+	drm_printf(&p, "\nSOC Memory Information\n");
+	drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size);
+	drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size);
+	drm_printf(&p, "gtt size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size);
+
+	/* GDS Config */
+	drm_printf(&p, "\nGDS Config\n");
+	drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size);
+	drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size);
+	drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size);
+	drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size);
+
+	/* HWIP Version Information */
+	drm_printf(&p, "\nHW IP Version Information\n");
+	for (int i = 1; i < MAX_HWIP; i++) {
+		for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+			ver = coredump->adev->ip_versions[i][j];
+			if (ver)
+				drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n",
+					   hw_ip_names[i], i, j,
+					   IP_VERSION_MAJ(ver),
+					   IP_VERSION_MIN(ver),
+					   IP_VERSION_REV(ver),
+					   IP_VERSION_VARIANT(ver),
+					   IP_VERSION_SUBREV(ver));
+		}
+	}
+
+	/* IP firmware information */
+	drm_printf(&p, "\nIP Firmwares\n");
+	amdgpu_devcoredump_fw_info(coredump->adev, &p);
+
+	if (coredump->ring) {
+		drm_printf(&p, "\nRing timed out details\n");
+		drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+			   coredump->ring->funcs->type,
+			   coredump->ring->name);
+	}
+
+	/* Add page fault information */
+	fault_info = &coredump->adev->vm_manager.fault_info;
+	drm_printf(&p, "\n[%s] Page fault observed\n",
+		   fault_info->vmhub ? "mmhub" : "gfxhub");
+	drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
+	drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
+
+	/* dump the ip state for each ip */
+	drm_printf(&p, "IP Dump\n");
+	for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+		ip_block = &coredump->adev->ip_blocks[i];
+		if (ip_block->version->funcs->print_ip_state) {
+			drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name);
+			ip_block->version->funcs->print_ip_state(ip_block, &p);
+			drm_printf(&p, "\n");
+		}
+	}
+
+	/* Add ring buffer information */
+	drm_printf(&p, "Ring buffer information\n");
+	for (int i = 0; i < coredump->adev->num_rings; i++) {
+		int j = 0;
+		struct amdgpu_ring *ring = coredump->adev->rings[i];
+
+		drm_printf(&p, "ring name: %s\n", ring->name);
+		drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+			   amdgpu_ring_get_rptr(ring),
+			   amdgpu_ring_get_wptr(ring),
+			   ring->buf_mask);
+		drm_printf(&p, "Ring size in dwords: %d\n",
+			   ring->ring_size / 4);
+		drm_printf(&p, "Ring contents\n");
+		drm_printf(&p, "Offset \t Value\n");
+
+		while (j < ring->ring_size) {
+			drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
+			j += 4;
+		}
+	}
+
+	if (coredump->skip_vram_check)
+		drm_printf(&p, "VRAM lost check is skipped!\n");
+	else if (coredump->reset_vram_lost)
+		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+
+	return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+	kfree(data);
+}
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+		     bool vram_lost, struct amdgpu_job *job)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct amdgpu_coredump_info *coredump;
+	struct drm_sched_job *s_job;
+
+	coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
+
+	if (!coredump) {
+		DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+		return;
+	}
+
+	coredump->skip_vram_check = skip_vram_check;
+	coredump->reset_vram_lost = vram_lost;
+
+	if (job && job->pasid) {
+		struct amdgpu_task_info *ti;
+
+		ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+		if (ti) {
+			coredump->reset_task_info = *ti;
+			amdgpu_vm_put_task_info(ti);
+		}
+	}
+
+	if (job) {
+		s_job = &job->base;
+		coredump->ring = to_amdgpu_ring(s_job->sched);
+	}
+
+	coredump->adev = adev;
+
+	ktime_get_ts64(&coredump->reset_time);
+
+	dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+	drm_info(dev, "AMDGPU device coredump file has been created\n");
+	drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+		 dev->primary->index);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
new file mode 100644
index 000000000000..ef9772c6bcc9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DEV_COREDUMP_H__
+#define __AMDGPU_DEV_COREDUMP_H__
+
+#include "amdgpu.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
+struct amdgpu_coredump_info {
+	struct amdgpu_device            *adev;
+	struct amdgpu_task_info         reset_task_info;
+	struct timespec64               reset_time;
+	bool                            skip_vram_check;
+	bool                            reset_vram_lost;
+	struct amdgpu_ring              *ring;
+};
+#endif
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
+		     bool vram_lost, struct amdgpu_job *job);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
new file mode 100644
index 000000000000..58c3ffe707d1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -0,0 +1,7837 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include <linux/aperture.h>
+#include <linux/power_supply.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/apple-gmux.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client_event.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/amdgpu_drm.h>
+#include <linux/device.h>
+#include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
+#include <linux/efi.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_atomfirmware.h"
+#include "amd_pcie.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "si.h"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#include "cik.h"
+#endif
+#include "vi.h"
+#include "soc15.h"
+#include "nv.h"
+#include "bif/bif_4_1_d.h"
+#include <linux/firmware.h>
+#include "amdgpu_vf_error.h"
+
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_pm.h"
+
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ras_mgr.h"
+#include "amdgpu_pmu.h"
+#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_virt.h"
+#include "amdgpu_dev_coredump.h"
+
+#include <linux/suspend.h>
+#include <drm/task_barrier.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#endif
+
+MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
+
+#define AMDGPU_RESUME_MS		2000
+#define AMDGPU_MAX_RETRY_LIMIT		2
+#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
+#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
+#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
+#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+
+#define AMDGPU_VBIOS_SKIP (1U << 0)
+#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
+
+static const struct drm_driver amdgpu_kms_driver;
+
+const char *amdgpu_asic_name[] = {
+	"TAHITI",
+	"PITCAIRN",
+	"VERDE",
+	"OLAND",
+	"HAINAN",
+	"BONAIRE",
+	"KAVERI",
+	"KABINI",
+	"HAWAII",
+	"MULLINS",
+	"TOPAZ",
+	"TONGA",
+	"FIJI",
+	"CARRIZO",
+	"STONEY",
+	"POLARIS10",
+	"POLARIS11",
+	"POLARIS12",
+	"VEGAM",
+	"VEGA10",
+	"VEGA12",
+	"VEGA20",
+	"RAVEN",
+	"ARCTURUS",
+	"RENOIR",
+	"ALDEBARAN",
+	"NAVI10",
+	"CYAN_SKILLFISH",
+	"NAVI14",
+	"NAVI12",
+	"SIENNA_CICHLID",
+	"NAVY_FLOUNDER",
+	"VANGOGH",
+	"DIMGREY_CAVEFISH",
+	"BEIGE_GOBY",
+	"YELLOW_CARP",
+	"IP DISCOVERY",
+	"LAST",
+};
+
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM  - 1, 0)
+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+	.level = AMDGPU_INIT_LEVEL_DEFAULT,
+	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+struct amdgpu_init_level amdgpu_init_recovery = {
+	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
+ * is used for cases like reset on initialization where the entire hive needs to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+	.hwini_ip_block_mask =
+		BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+		BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
+		BIT(AMD_IP_BLOCK_TYPE_PSP)
+};
+
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev);
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev);
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev);
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+					     enum amd_ip_block_type block)
+{
+	return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+			   enum amdgpu_init_lvl_id lvl)
+{
+	switch (lvl) {
+	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+		adev->init_lvl = &amdgpu_init_minimal_xgmi;
+		break;
+	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+		adev->init_lvl = &amdgpu_init_recovery;
+		break;
+	case AMDGPU_INIT_LEVEL_DEFAULT:
+		fallthrough;
+	default:
+		adev->init_lvl = &amdgpu_init_default;
+		break;
+	}
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+				     void *data);
+
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs).
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received.
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+	return sysfs_emit(buf, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, 0444,
+		amdgpu_device_get_pcie_replay_count, NULL);
+
+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	if (amdgpu_nbio_is_replay_cnt_supported(adev))
+		ret = sysfs_create_file(&adev->dev->kobj,
+					&dev_attr_pcie_replay_count.attr);
+
+	return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (amdgpu_nbio_is_replay_cnt_supported(adev))
+		sysfs_remove_file(&adev->dev->kobj,
+				  &dev_attr_pcie_replay_count.attr);
+}
+
+static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
+					  const struct bin_attribute *attr, char *buf,
+					  loff_t ppos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	ssize_t bytes_read;
+
+	switch (ppos) {
+	case AMDGPU_SYS_REG_STATE_XGMI:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_WAFL:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_PCIE:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
+		break;
+	case AMDGPU_SYS_REG_STATE_USR_1:
+		bytes_read = amdgpu_asic_get_reg_state(
+			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return bytes_read;
+}
+
+static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+		      AMDGPU_SYS_REG_STATE_END);
+
+int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return 0;
+
+	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+
+	return ret;
+}
+
+void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_asic_get_reg_state_supported(adev))
+		return;
+	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
+}
+
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+
+	if (ip_block->version->funcs->suspend) {
+		r = ip_block->version->funcs->suspend(ip_block);
+		if (r) {
+			dev_err(ip_block->adev->dev,
+				"suspend of IP block <%s> failed %d\n",
+				ip_block->version->funcs->name, r);
+			return r;
+		}
+	}
+
+	ip_block->status.hw = false;
+	return 0;
+}
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+
+	if (ip_block->version->funcs->resume) {
+		r = ip_block->version->funcs->resume(ip_block);
+		if (r) {
+			dev_err(ip_block->adev->dev,
+				"resume of IP block <%s> failed %d\n",
+				ip_block->version->funcs->name, r);
+			return r;
+		}
+	}
+
+	ip_block->status.hw = true;
+	return 0;
+}
+
+/**
+ * DOC: board_info
+ *
+ * The amdgpu driver provides a sysfs API for giving board related information.
+ * It provides the form factor information in the format
+ *
+ *   type : form factor
+ *
+ * Possible form factor values
+ *
+ * - "cem"		- PCIE CEM card
+ * - "oam"		- Open Compute Accelerator Module
+ * - "unknown"	- Not known
+ *
+ */
+
+static ssize_t amdgpu_device_get_board_info(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
+	const char *pkg;
+
+	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
+		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
+
+	switch (pkg_type) {
+	case AMDGPU_PKG_TYPE_CEM:
+		pkg = "cem";
+		break;
+	case AMDGPU_PKG_TYPE_OAM:
+		pkg = "oam";
+		break;
+	default:
+		pkg = "unknown";
+		break;
+	}
+
+	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
+}
+
+static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
+
+static struct attribute *amdgpu_board_attrs[] = {
+	&dev_attr_board_info.attr,
+	NULL,
+};
+
+static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
+					     struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (adev->flags & AMD_IS_APU)
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group amdgpu_board_attrs_group = {
+	.attrs = amdgpu_board_attrs,
+	.is_visible = amdgpu_board_attrs_is_visible
+};
+
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+
+/**
+ * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true if the device is a dGPU with ATPX power control,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_px(struct amdgpu_device *adev)
+{
+	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
+		return true;
+	return false;
+}
+
+/**
+ * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true if the device is a dGPU with ACPI power control,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_boco(struct amdgpu_device *adev)
+{
+	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+		return false;
+
+	if (adev->has_pr3 ||
+	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
+		return true;
+	return false;
+}
+
+/**
+ * amdgpu_device_supports_baco - Does the device support BACO
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Return:
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
+ * otherwise return 0.
+ */
+int amdgpu_device_supports_baco(struct amdgpu_device *adev)
+{
+	return amdgpu_asic_supports_baco(adev);
+}
+
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
+{
+	int bamaco_support;
+
+	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+	bamaco_support = amdgpu_device_supports_baco(adev);
+
+	switch (amdgpu_runtime_pm) {
+	case 2:
+		if (bamaco_support & MACO_SUPPORT) {
+			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
+		} else if (bamaco_support == BACO_SUPPORT) {
+			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
+		}
+		break;
+	case 1:
+		if (bamaco_support & BACO_SUPPORT) {
+			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
+		}
+		break;
+	case -1:
+	case -2:
+		if (amdgpu_device_supports_px(adev)) {
+			/* enable PX as runtime mode */
+			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+			dev_info(adev->dev, "Using ATPX for runtime pm\n");
+		} else if (amdgpu_device_supports_boco(adev)) {
+			/* enable boco as runtime mode */
+			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+			dev_info(adev->dev, "Using BOCO for runtime pm\n");
+		} else {
+			if (!bamaco_support)
+				goto no_runtime_pm;
+
+			switch (adev->asic_type) {
+			case CHIP_VEGA20:
+			case CHIP_ARCTURUS:
+				/* BACO are not supported on vega20 and arctrus */
+				break;
+			case CHIP_VEGA10:
+				/* enable BACO as runpm mode if noretry=0 */
+				if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
+					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+				break;
+			default:
+				/* enable BACO as runpm mode on CI+ */
+				if (!amdgpu_passthrough(adev))
+					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+				break;
+			}
+
+			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+				if (bamaco_support & MACO_SUPPORT) {
+					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
+				} else {
+					dev_info(adev->dev, "Using BACO for runtime pm\n");
+				}
+			}
+		}
+		break;
+	case 0:
+		dev_info(adev->dev, "runtime pm is manually disabled\n");
+		break;
+	default:
+		break;
+	}
+
+no_runtime_pm:
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+		dev_info(adev->dev, "Runtime PM not available\n");
+}
+/**
+ * amdgpu_device_supports_smart_shift - Is the device dGPU with
+ * smart shift support
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns true if the device is a dGPU with Smart Shift support,
+ * otherwise returns false.
+ */
+bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev)
+{
+	return (amdgpu_device_supports_boco(adev) &&
+		amdgpu_acpi_is_power_shift_control_supported());
+}
+
+/*
+ * VRAM access helper functions
+ */
+
+/**
+ * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
+			     void *buf, size_t size, bool write)
+{
+	unsigned long flags;
+	uint32_t hi = ~0, tmp = 0;
+	uint32_t *data = buf;
+	uint64_t last;
+	int idx;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return;
+
+	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
+
+	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+	for (last = pos + size; pos < last; pos += 4) {
+		tmp = pos >> 31;
+
+		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+		if (tmp != hi) {
+			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+			hi = tmp;
+		}
+		if (write)
+			WREG32_NO_KIQ(mmMM_DATA, *data++);
+		else
+			*data++ = RREG32_NO_KIQ(mmMM_DATA);
+	}
+
+	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+	drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_device_aper_access - access vram by vram aperture
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ *
+ * The return value means how many bytes have been transferred.
+ */
+size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
+				 void *buf, size_t size, bool write)
+{
+#ifdef CONFIG_64BIT
+	void __iomem *addr;
+	size_t count = 0;
+	uint64_t last;
+
+	if (!adev->mman.aper_base_kaddr)
+		return 0;
+
+	last = min(pos + size, adev->gmc.visible_vram_size);
+	if (last > pos) {
+		addr = adev->mman.aper_base_kaddr + pos;
+		count = last - pos;
+
+		if (write) {
+			memcpy_toio(addr, buf, count);
+			/* Make sure HDP write cache flush happens without any reordering
+			 * after the system memory contents are sent over PCIe device
+			 */
+			mb();
+			amdgpu_device_flush_hdp(adev, NULL);
+		} else {
+			amdgpu_device_invalidate_hdp(adev, NULL);
+			/* Make sure HDP read cache is invalidated before issuing a read
+			 * to the PCIe device
+			 */
+			mb();
+			memcpy_fromio(buf, addr, count);
+		}
+
+	}
+
+	return count;
+#else
+	return 0;
+#endif
+}
+
+/**
+ * amdgpu_device_vram_access - read/write a buffer in vram
+ *
+ * @adev: amdgpu_device pointer
+ * @pos: offset of the buffer in vram
+ * @buf: virtual address of the buffer in system memory
+ * @size: read/write size, sizeof(@buf) must > @size
+ * @write: true - write to vram, otherwise - read from vram
+ */
+void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
+			       void *buf, size_t size, bool write)
+{
+	size_t count;
+
+	/* try to using vram apreature to access vram first */
+	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+	size -= count;
+	if (size) {
+		/* using MM to access rest vram */
+		pos += count;
+		buf += count;
+		amdgpu_device_mm_access(adev, pos, buf, size, write);
+	}
+}
+
+/*
+ * register access helper functions.
+ */
+
+/* Check if hw access should be skipped because of hotplug or device error */
+bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
+{
+	if (adev->no_hw_access)
+		return true;
+
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * This is a bit complicated to understand, so worth a comment. What we assert
+	 * here is that the GPU reset is not running on another thread in parallel.
+	 *
+	 * For this we trylock the read side of the reset semaphore, if that succeeds
+	 * we know that the reset is not running in parallel.
+	 *
+	 * If the trylock fails we assert that we are either already holding the read
+	 * side of the lock or are the reset thread itself and hold the write side of
+	 * the lock.
+	 */
+	if (in_task()) {
+		if (down_read_trylock(&adev->reset_domain->sem))
+			up_read(&adev->reset_domain->sem);
+		else
+			lockdep_assert_held(&adev->reset_domain->sem);
+	}
+#endif
+	return false;
+}
+
+/**
+ * amdgpu_device_rreg - read a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
+			    uint32_t reg, uint32_t acc_flags)
+{
+	uint32_t ret;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			ret = amdgpu_kiq_rreg(adev, reg, 0);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		ret = adev->pcie_rreg(adev, reg * 4);
+	}
+
+	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
+
+	return ret;
+}
+
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ */
+
+/**
+ * amdgpu_mm_rreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ *
+ * Returns the 8 bit value from the offset specified.
+ */
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (offset < adev->rmmio_size)
+		return (readb(adev->rmmio + offset));
+	BUG();
+}
+
+
+/**
+ * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
+uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
+				uint32_t reg, uint32_t acc_flags,
+				uint32_t xcc_id)
+{
+	uint32_t ret, rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (amdgpu_sriov_vf(adev) &&
+		    !amdgpu_sriov_runtime(adev) &&
+		    adev->gfx.rlc.rlcg_reg_access_supported &&
+		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+							 GC_HWIP, false,
+							 &rlcg_flag)) {
+			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
+		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		ret = adev->pcie_rreg(adev, reg * 4);
+	}
+
+	return ret;
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ */
+
+/**
+ * amdgpu_mm_wreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ * @value: 8 bit value to write
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (offset < adev->rmmio_size)
+		writeb(value, adev->rmmio + offset);
+	else
+		BUG();
+}
+
+/**
+ * amdgpu_device_wreg - write to a memory mapped IO or indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_wreg(struct amdgpu_device *adev,
+			uint32_t reg, uint32_t v,
+			uint32_t acc_flags)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			amdgpu_kiq_wreg(adev, reg, v, 0);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		adev->pcie_wreg(adev, reg * 4, v);
+	}
+
+	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
+}
+
+/**
+ * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: mmio/rlc register
+ * @v: value to write
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * this function is invoked only for the debugfs register access
+ */
+void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
+			     uint32_t reg, uint32_t v,
+			     uint32_t xcc_id)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (amdgpu_sriov_fullaccess(adev) &&
+	    adev->gfx.rlc.funcs &&
+	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
+		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
+			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
+	} else if ((reg * 4) >= adev->rmmio_size) {
+		adev->pcie_wreg(adev, reg * 4, v);
+	} else {
+		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+	}
+}
+
+/**
+ * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
+			uint32_t reg, uint32_t v,
+			uint32_t acc_flags, uint32_t xcc_id)
+{
+	uint32_t rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if ((reg * 4) < adev->rmmio_size) {
+		if (amdgpu_sriov_vf(adev) &&
+		    !amdgpu_sriov_runtime(adev) &&
+		    adev->gfx.rlc.rlcg_reg_access_supported &&
+		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
+							 GC_HWIP, true,
+							 &rlcg_flag)) {
+			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
+		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
+		    amdgpu_sriov_runtime(adev) &&
+		    down_read_trylock(&adev->reset_domain->sem)) {
+			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
+			up_read(&adev->reset_domain->sem);
+		} else {
+			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
+		}
+	} else {
+		adev->pcie_wreg(adev, reg * 4, v);
+	}
+}
+
+/**
+ * amdgpu_device_indirect_rreg - read an indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
+				u32 reg_addr)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_data_offset;
+	u32 r;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	r = readl(pcie_data_offset);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	return r;
+}
+
+u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
+				    u64 reg_addr)
+{
+	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+	u32 r;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_index_hi_offset;
+	void __iomem *pcie_data_offset;
+
+	if (unlikely(!adev->nbio.funcs)) {
+		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
+		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
+	} else {
+		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+	}
+
+	if (reg_addr >> 32) {
+		if (unlikely(!adev->nbio.funcs))
+			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
+		else
+			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+	} else {
+		pcie_index_hi = 0;
+	}
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+	if (pcie_index_hi != 0)
+		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+				pcie_index_hi * 4;
+
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	r = readl(pcie_data_offset);
+
+	/* clear the high bits */
+	if (pcie_index_hi != 0) {
+		writel(0, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	return r;
+}
+
+/**
+ * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register address to read from
+ *
+ * Returns the value of indirect register @reg_addr
+ */
+u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
+				  u32 reg_addr)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_data_offset;
+	u64 r;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+	/* read low 32 bits */
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	r = readl(pcie_data_offset);
+	/* read high 32 bits */
+	writel(reg_addr + 4, pcie_index_offset);
+	readl(pcie_index_offset);
+	r |= ((u64)readl(pcie_data_offset) << 32);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	return r;
+}
+
+u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
+				  u64 reg_addr)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	unsigned long pcie_index_hi = 0;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_index_hi_offset;
+	void __iomem *pcie_data_offset;
+	u64 r;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+	if (pcie_index_hi != 0)
+		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+			pcie_index_hi * 4;
+
+	/* read low 32 bits */
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	r = readl(pcie_data_offset);
+	/* read high 32 bits */
+	writel(reg_addr + 4, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	r |= ((u64)readl(pcie_data_offset) << 32);
+
+	/* clear the high bits */
+	if (pcie_index_hi != 0) {
+		writel(0, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	return r;
+}
+
+/**
+ * amdgpu_device_indirect_wreg - write an indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
+				 u32 reg_addr, u32 reg_data)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_data_offset;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	writel(reg_data, pcie_data_offset);
+	readl(pcie_data_offset);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
+				     u64 reg_addr, u32 reg_data)
+{
+	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_index_hi_offset;
+	void __iomem *pcie_data_offset;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+	else
+		pcie_index_hi = 0;
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+	if (pcie_index_hi != 0)
+		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+				pcie_index_hi * 4;
+
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	writel(reg_data, pcie_data_offset);
+	readl(pcie_data_offset);
+
+	/* clear the high bits */
+	if (pcie_index_hi != 0) {
+		writel(0, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
+ *
+ * @adev: amdgpu_device pointer
+ * @reg_addr: indirect register offset
+ * @reg_data: indirect register data
+ *
+ */
+void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
+				   u32 reg_addr, u64 reg_data)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_data_offset;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+
+	/* write low 32 bits */
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+	readl(pcie_data_offset);
+	/* write high 32 bits */
+	writel(reg_addr + 4, pcie_index_offset);
+	readl(pcie_index_offset);
+	writel((u32)(reg_data >> 32), pcie_data_offset);
+	readl(pcie_data_offset);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
+				   u64 reg_addr, u64 reg_data)
+{
+	unsigned long flags, pcie_index, pcie_data;
+	unsigned long pcie_index_hi = 0;
+	void __iomem *pcie_index_offset;
+	void __iomem *pcie_index_hi_offset;
+	void __iomem *pcie_data_offset;
+
+	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
+	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
+	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
+		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
+	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
+	if (pcie_index_hi != 0)
+		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
+				pcie_index_hi * 4;
+
+	/* write low 32 bits */
+	writel(reg_addr, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
+	readl(pcie_data_offset);
+	/* write high 32 bits */
+	writel(reg_addr + 4, pcie_index_offset);
+	readl(pcie_index_offset);
+	if (pcie_index_hi != 0) {
+		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+	writel((u32)(reg_data >> 32), pcie_data_offset);
+	readl(pcie_data_offset);
+
+	/* clear the high bits */
+	if (pcie_index_hi != 0) {
+		writel(0, pcie_index_hi_offset);
+		readl(pcie_index_hi_offset);
+	}
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_rev_id - query device rev_id
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Return device rev_id
+ */
+u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
+{
+	return adev->nbio.funcs->get_rev_id(adev);
+}
+
+/**
+ * amdgpu_invalid_rreg - dummy reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+	dev_err(adev->dev, "Invalid callback to read register 0x%04X\n", reg);
+	BUG();
+	return 0;
+}
+
+static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
+	BUG();
+	return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+	dev_err(adev->dev,
+		"Invalid callback to write register 0x%04X with 0x%08X\n", reg,
+		v);
+	BUG();
+}
+
+static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
+{
+	dev_err(adev->dev,
+		"Invalid callback to write register 0x%llX with 0x%08X\n", reg,
+		v);
+	BUG();
+}
+
+/**
+ * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
+{
+	dev_err(adev->dev, "Invalid callback to read 64 bit register 0x%04X\n",
+		reg);
+	BUG();
+	return 0;
+}
+
+static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
+{
+	dev_err(adev->dev, "Invalid callback to read register 0x%llX\n", reg);
+	BUG();
+	return 0;
+}
+
+/**
+ * amdgpu_invalid_wreg64 - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
+{
+	dev_err(adev->dev,
+		"Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
+		reg, v);
+	BUG();
+}
+
+static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
+{
+	dev_err(adev->dev,
+		"Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
+		reg, v);
+	BUG();
+}
+
+/**
+ * amdgpu_block_invalid_rreg - dummy reg read function
+ *
+ * @adev: amdgpu_device pointer
+ * @block: offset of instance
+ * @reg: offset of register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
+static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
+					  uint32_t block, uint32_t reg)
+{
+	dev_err(adev->dev,
+		"Invalid callback to read register 0x%04X in block 0x%04X\n",
+		reg, block);
+	BUG();
+	return 0;
+}
+
+/**
+ * amdgpu_block_invalid_wreg - dummy reg write function
+ *
+ * @adev: amdgpu_device pointer
+ * @block: offset of instance
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ */
+static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
+				      uint32_t block,
+				      uint32_t reg, uint32_t v)
+{
+	dev_err(adev->dev,
+		"Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
+		reg, block, v);
+	BUG();
+}
+
+static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
+{
+	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+		return AMDGPU_VBIOS_SKIP;
+
+	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
+		return AMDGPU_VBIOS_OPTIONAL;
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_asic_init - Wrapper for atom asic_init
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Does any asic specific work and then calls atom asic init.
+ */
+static int amdgpu_device_asic_init(struct amdgpu_device *adev)
+{
+	uint32_t flags;
+	bool optional;
+	int ret;
+
+	amdgpu_asic_pre_asic_init(adev);
+	flags = amdgpu_device_get_vbios_flags(adev);
+	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
+		amdgpu_psp_wait_for_bootloader(adev);
+		if (optional && !adev->bios)
+			return 0;
+
+		ret = amdgpu_atomfirmware_asic_init(adev, true);
+		return ret;
+	} else {
+		if (optional && !adev->bios)
+			return 0;
+
+		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocates a scratch page of VRAM for use by various things in the
+ * driver.
+ */
+static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
+{
+	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM |
+				       AMDGPU_GEM_DOMAIN_GTT,
+				       &adev->mem_scratch.robj,
+				       &adev->mem_scratch.gpu_addr,
+				       (void **)&adev->mem_scratch.ptr);
+}
+
+/**
+ * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the VRAM scratch page.
+ */
+static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
+}
+
+/**
+ * amdgpu_device_program_register_sequence - program an array of registers.
+ *
+ * @adev: amdgpu_device pointer
+ * @registers: pointer to the register array
+ * @array_size: size of the register array
+ *
+ * Programs an array or registers with and or masks.
+ * This is a helper for setting golden registers.
+ */
+void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
+					     const u32 *registers,
+					     const u32 array_size)
+{
+	u32 tmp, reg, and_mask, or_mask;
+	int i;
+
+	if (array_size % 3)
+		return;
+
+	for (i = 0; i < array_size; i += 3) {
+		reg = registers[i + 0];
+		and_mask = registers[i + 1];
+		or_mask = registers[i + 2];
+
+		if (and_mask == 0xffffffff) {
+			tmp = or_mask;
+		} else {
+			tmp = RREG32(reg);
+			tmp &= ~and_mask;
+			if (adev->family >= AMDGPU_FAMILY_AI)
+				tmp |= (or_mask & and_mask);
+			else
+				tmp |= or_mask;
+		}
+		WREG32(reg, tmp);
+	}
+}
+
+/**
+ * amdgpu_device_pci_config_reset - reset the GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using the pci config reset sequence.
+ * Only applicable to asics prior to vega10.
+ */
+void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
+{
+	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
+}
+
+/**
+ * amdgpu_device_pci_reset - reset the GPU using generic PCI means
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
+ */
+int amdgpu_device_pci_reset(struct amdgpu_device *adev)
+{
+	return pci_reset_function(adev->pdev);
+}
+
+/*
+ * amdgpu_device_wb_*()
+ * Writeback is the method by which the GPU updates special pages in memory
+ * with the status of certain GPU events (fences, ring pointers,etc.).
+ */
+
+/**
+ * amdgpu_device_wb_fini - Disable Writeback and free memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver shutdown.
+ */
+static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
+{
+	if (adev->wb.wb_obj) {
+		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
+				      &adev->wb.gpu_addr,
+				      (void **)&adev->wb.wb);
+		adev->wb.wb_obj = NULL;
+	}
+}
+
+/**
+ * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initializes writeback and allocates writeback memory (all asics).
+ * Used at driver startup.
+ * Returns 0 on success or an -error on failure.
+ */
+static int amdgpu_device_wb_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->wb.wb_obj == NULL) {
+		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
+		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
+					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
+					    (void **)&adev->wb.wb);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
+			return r;
+		}
+
+		adev->wb.num_wb = AMDGPU_MAX_WB;
+		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
+
+		/* clear wb memory */
+		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_wb_get - Allocate a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Allocate a wb slot for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
+int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
+{
+	unsigned long flags, offset;
+
+	spin_lock_irqsave(&adev->wb.lock, flags);
+	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+	if (offset < adev->wb.num_wb) {
+		__set_bit(offset, adev->wb.used);
+		spin_unlock_irqrestore(&adev->wb.lock, flags);
+		*wb = offset << 3; /* convert to dw offset */
+		return 0;
+	} else {
+		spin_unlock_irqrestore(&adev->wb.lock, flags);
+		return -EINVAL;
+	}
+}
+
+/**
+ * amdgpu_device_wb_free - Free a wb entry
+ *
+ * @adev: amdgpu_device pointer
+ * @wb: wb index
+ *
+ * Free a wb slot allocated for use by the driver (all asics)
+ */
+void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
+{
+	unsigned long flags;
+
+	wb >>= 3;
+	spin_lock_irqsave(&adev->wb.lock, flags);
+	if (wb < adev->wb.num_wb)
+		__clear_bit(wb, adev->wb.used);
+	spin_unlock_irqrestore(&adev->wb.lock, flags);
+}
+
+/**
+ * amdgpu_device_resize_fb_bar - try to resize FB BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
+ * to fail, but if any of the BARs is not accessible after the size we abort
+ * driver loading by returning -ENODEV.
+ */
+int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
+{
+	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
+	struct pci_bus *root;
+	struct resource *res;
+	int max_size, r;
+	unsigned int i;
+	u16 cmd;
+
+	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+		return 0;
+
+	/* Bypass for VF */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	if (!amdgpu_rebar)
+		return 0;
+
+	/* resizing on Dell G5 SE platforms causes problems with runtime pm */
+	if ((amdgpu_runtime_pm != 0) &&
+	    adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
+	    adev->pdev->device == 0x731f &&
+	    adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+		return 0;
+
+	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
+	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
+		dev_warn(
+			adev->dev,
+			"System can't access extended configuration space, please check!!\n");
+
+	/* skip if the bios has already enabled large BAR */
+	if (adev->gmc.real_vram_size &&
+	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
+		return 0;
+
+	/* Check if the root BUS has 64bit memory resources */
+	root = adev->pdev->bus;
+	while (root->parent)
+		root = root->parent;
+
+	pci_bus_for_each_resource(root, res, i) {
+		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
+		    res->start > 0x100000000ull)
+			break;
+	}
+
+	/* Trying to resize is pointless without a root hub window above 4GB */
+	if (!res)
+		return 0;
+
+	/* Limit the BAR size to what is available */
+	max_size = pci_rebar_get_max_size(adev->pdev, 0);
+	if (max_size < 0)
+		return 0;
+	rbar_size = min(max_size, rbar_size);
+
+	/* Disable memory decoding while we change the BAR addresses and size */
+	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
+	pci_write_config_word(adev->pdev, PCI_COMMAND,
+			      cmd & ~PCI_COMMAND_MEMORY);
+
+	/* Tear down doorbell as resizing will release BARs */
+	amdgpu_doorbell_fini(adev);
+
+	r = pci_resize_resource(adev->pdev, 0, rbar_size,
+				(adev->asic_type >= CHIP_BONAIRE) ? 1 << 5
+								  : 1 << 2);
+	if (r == -ENOSPC)
+		dev_info(adev->dev,
+			 "Not enough PCI address space for a large BAR.");
+	else if (r && r != -ENOTSUPP)
+		dev_err(adev->dev, "Problem resizing BAR0 (%d).", r);
+
+	/* When the doorbell or fb BAR isn't available we have no chance of
+	 * using the device.
+	 */
+	r = amdgpu_doorbell_init(adev);
+	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
+		return -ENODEV;
+
+	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
+
+	return 0;
+}
+
+/*
+ * GPU helpers function.
+ */
+/**
+ * amdgpu_device_need_post - check if the hw need post or not
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check if the asic has been initialized (all asics) at driver startup
+ * or post is needed if  hw reset is performed.
+ * Returns true if need or false if not.
+ */
+bool amdgpu_device_need_post(struct amdgpu_device *adev)
+{
+	uint32_t reg, flags;
+
+	if (amdgpu_sriov_vf(adev))
+		return false;
+
+	flags = amdgpu_device_get_vbios_flags(adev);
+	if (flags & AMDGPU_VBIOS_SKIP)
+		return false;
+	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
+		return false;
+
+	if (amdgpu_passthrough(adev)) {
+		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
+		 * some old smc fw still need driver do vPost otherwise gpu hang, while
+		 * those smc fw version above 22.15 doesn't have this flaw, so we force
+		 * vpost executed for smc version below 22.15
+		 */
+		if (adev->asic_type == CHIP_FIJI) {
+			int err;
+			uint32_t fw_ver;
+
+			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
+			/* force vPost if error occurred */
+			if (err)
+				return true;
+
+			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
+			release_firmware(adev->pm.fw);
+			if (fw_ver < 0x00160e00)
+				return true;
+		}
+	}
+
+	/* Don't post if we need to reset whole hive on init */
+	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+		return false;
+
+	if (adev->has_hw_reset) {
+		adev->has_hw_reset = false;
+		return true;
+	}
+
+	/* bios scratch used on CIK+ */
+	if (adev->asic_type >= CHIP_BONAIRE)
+		return amdgpu_atombios_scratch_need_asic_init(adev);
+
+	/* check MEM_SIZE for older asics */
+	reg = amdgpu_asic_get_config_memsize(adev);
+
+	if ((reg != 0) && (reg != 0xffffffff))
+		return false;
+
+	return true;
+}
+
+/*
+ * Check whether seamless boot is supported.
+ *
+ * So far we only support seamless boot on DCE 3.0 or later.
+ * If users report that it works on older ASICS as well, we may
+ * loosen this.
+ */
+bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
+{
+	switch (amdgpu_seamless) {
+	case -1:
+		break;
+	case 1:
+		return true;
+	case 0:
+		return false;
+	default:
+		dev_err(adev->dev, "Invalid value for amdgpu.seamless: %d\n",
+			amdgpu_seamless);
+		return false;
+	}
+
+	if (!(adev->flags & AMD_IS_APU))
+		return false;
+
+	if (adev->mman.keep_stolen_vga_memory)
+		return false;
+
+	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
+}
+
+/*
+ * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
+ * don't support dynamic speed switching. Until we have confirmation from Intel
+ * that a specific host supports it, it's safer that we keep it disabled for all.
+ *
+ * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	/* eGPU change speeds based on USB4 fabric conditions */
+	if (dev_is_removable(adev->dev))
+		return true;
+
+	if (c->x86_vendor == X86_VENDOR_INTEL)
+		return false;
+#endif
+	return true;
+}
+
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+	/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
+	 * It's unclear if this is a platform-specific or GPU-specific issue.
+	 * Disable ASPM on SI for the time being.
+	 */
+	if (adev->family == AMDGPU_FAMILY_SI)
+		return true;
+
+#if IS_ENABLED(CONFIG_X86)
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+		  amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+		return false;
+
+	if (c->x86 == 6 &&
+		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+		switch (c->x86_model) {
+		case VFM_MODEL(INTEL_ALDERLAKE):
+		case VFM_MODEL(INTEL_ALDERLAKE_L):
+		case VFM_MODEL(INTEL_RAPTORLAKE):
+		case VFM_MODEL(INTEL_RAPTORLAKE_P):
+		case VFM_MODEL(INTEL_RAPTORLAKE_S):
+			return true;
+		default:
+			return false;
+		}
+	} else {
+		return false;
+	}
+#else
+	return false;
+#endif
+}
+
+/**
+ * amdgpu_device_should_use_aspm - check if the device should program ASPM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Confirm whether the module parameter and pcie bridge agree that ASPM should
+ * be set for this device.
+ *
+ * Returns true if it should be used or false if not.
+ */
+bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
+{
+	switch (amdgpu_aspm) {
+	case -1:
+		break;
+	case 0:
+		return false;
+	case 1:
+		return true;
+	default:
+		return false;
+	}
+	if (adev->flags & AMD_IS_APU)
+		return false;
+	if (amdgpu_device_aspm_support_quirk(adev))
+		return false;
+	return pcie_aspm_enabled(adev->pdev);
+}
+
+/* if we get transitioned to only one device, take VGA back */
+/**
+ * amdgpu_device_vga_set_decode - enable/disable vga decode
+ *
+ * @pdev: PCI device pointer
+ * @state: enable/disable vga decode
+ *
+ * Enable/disable vga decode (all asics).
+ * Returns VGA resource flags.
+ */
+static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
+		bool state)
+{
+	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
+
+	amdgpu_asic_set_vga_state(adev, state);
+	if (state)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
+
+/**
+ * amdgpu_device_check_block_size - validate the vm block size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm block size specified via module parameter.
+ * The vm block size defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory.
+ */
+static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
+{
+	/* defines number of bits in page table versus page directory,
+	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+	 * page table and the remaining bits are in the page directory
+	 */
+	if (amdgpu_vm_block_size == -1)
+		return;
+
+	if (amdgpu_vm_block_size < 9) {
+		dev_warn(adev->dev, "VM page table size (%d) too small\n",
+			 amdgpu_vm_block_size);
+		amdgpu_vm_block_size = -1;
+	}
+}
+
+/**
+ * amdgpu_device_check_vm_size - validate the vm size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm size in GB specified via module parameter.
+ * The VM size is the size of the GPU virtual memory space in GB.
+ */
+static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
+{
+	/* no need to check the default value */
+	if (amdgpu_vm_size == -1)
+		return;
+
+	if (amdgpu_vm_size < 1) {
+		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
+			 amdgpu_vm_size);
+		amdgpu_vm_size = -1;
+	}
+}
+
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+	struct sysinfo si;
+	bool is_os_64 = (sizeof(void *) == 8);
+	uint64_t total_memory;
+	uint64_t dram_size_seven_GB = 0x1B8000000;
+	uint64_t dram_size_three_GB = 0xB8000000;
+
+	if (amdgpu_smu_memory_pool_size == 0)
+		return;
+
+	if (!is_os_64) {
+		dev_warn(adev->dev, "Not 64-bit OS, feature not supported\n");
+		goto def_value;
+	}
+	si_meminfo(&si);
+	total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+	if ((amdgpu_smu_memory_pool_size == 1) ||
+		(amdgpu_smu_memory_pool_size == 2)) {
+		if (total_memory < dram_size_three_GB)
+			goto def_value1;
+	} else if ((amdgpu_smu_memory_pool_size == 4) ||
+		(amdgpu_smu_memory_pool_size == 8)) {
+		if (total_memory < dram_size_seven_GB)
+			goto def_value1;
+	} else {
+		dev_warn(adev->dev, "Smu memory pool size not supported\n");
+		goto def_value;
+	}
+	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+	return;
+
+def_value1:
+	dev_warn(adev->dev, "No enough system memory\n");
+def_value:
+	adev->pm.smu_prv_buffer_size = 0;
+}
+
+static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
+{
+	if (!(adev->flags & AMD_IS_APU) ||
+	    adev->asic_type < CHIP_RAVEN)
+		return 0;
+
+	switch (adev->asic_type) {
+	case CHIP_RAVEN:
+		if (adev->pdev->device == 0x15dd)
+			adev->apu_flags |= AMD_APU_IS_RAVEN;
+		if (adev->pdev->device == 0x15d8)
+			adev->apu_flags |= AMD_APU_IS_PICASSO;
+		break;
+	case CHIP_RENOIR:
+		if ((adev->pdev->device == 0x1636) ||
+		    (adev->pdev->device == 0x164c))
+			adev->apu_flags |= AMD_APU_IS_RENOIR;
+		else
+			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
+		break;
+	case CHIP_VANGOGH:
+		adev->apu_flags |= AMD_APU_IS_VANGOGH;
+		break;
+	case CHIP_YELLOW_CARP:
+		break;
+	case CHIP_CYAN_SKILLFISH:
+		if ((adev->pdev->device == 0x13FE) ||
+		    (adev->pdev->device == 0x143F))
+			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_check_arguments - validate module params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates certain module parameters and updates
+ * the associated values used by the driver (all asics).
+ */
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (amdgpu_sched_jobs < 4) {
+		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
+			 amdgpu_sched_jobs);
+		amdgpu_sched_jobs = 4;
+	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
+		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
+			 amdgpu_sched_jobs);
+		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
+	}
+
+	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
+		/* gart size must be greater or equal to 32M */
+		dev_warn(adev->dev, "gart size (%d) too small\n",
+			 amdgpu_gart_size);
+		amdgpu_gart_size = -1;
+	}
+
+	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
+		/* gtt size must be greater or equal to 32M */
+		dev_warn(adev->dev, "gtt size (%d) too small\n",
+				 amdgpu_gtt_size);
+		amdgpu_gtt_size = -1;
+	}
+
+	/* valid range is between 4 and 9 inclusive */
+	if (amdgpu_vm_fragment_size != -1 &&
+	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
+		dev_warn(adev->dev, "valid range is between 4 and 9\n");
+		amdgpu_vm_fragment_size = -1;
+	}
+
+	if (amdgpu_sched_hw_submission < 2) {
+		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
+			 amdgpu_sched_hw_submission);
+		amdgpu_sched_hw_submission = 2;
+	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
+		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
+			 amdgpu_sched_hw_submission);
+		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
+	}
+
+	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
+		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
+		amdgpu_reset_method = -1;
+	}
+
+	amdgpu_device_check_smu_prv_buffer_size(adev);
+
+	amdgpu_device_check_vm_size(adev);
+
+	amdgpu_device_check_block_size(adev);
+
+	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+	for (i = 0; i < MAX_XCP; i++) {
+		switch (amdgpu_enforce_isolation) {
+		case -1:
+		case 0:
+		default:
+			/* disable */
+			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+			break;
+		case 1:
+			/* enable */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE;
+			break;
+		case 2:
+			/* enable legacy mode */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+			break;
+		case 3:
+			/* enable only process isolation without submitting cleaner shader */
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_switcheroo_set_state - set switcheroo state
+ *
+ * @pdev: pci dev pointer
+ * @state: vga_switcheroo state
+ *
+ * Callback for the switcheroo driver.  Suspends or resumes
+ * the asics before or after it is powered up using ACPI methods.
+ */
+static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
+					enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	int r;
+
+	if (amdgpu_device_supports_px(drm_to_adev(dev)) &&
+	    state == VGA_SWITCHEROO_OFF)
+		return;
+
+	if (state == VGA_SWITCHEROO_ON) {
+		pr_info("switched on\n");
+		/* don't suspend or resume card normally */
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+		pci_set_power_state(pdev, PCI_D0);
+		amdgpu_device_load_pci_state(pdev);
+		r = pci_enable_device(pdev);
+		if (r)
+			dev_warn(&pdev->dev, "pci_enable_device failed (%d)\n",
+				 r);
+		amdgpu_device_resume(dev, true);
+
+		dev->switch_power_state = DRM_SWITCH_POWER_ON;
+	} else {
+		dev_info(&pdev->dev, "switched off\n");
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+		amdgpu_device_prepare(dev);
+		amdgpu_device_suspend(dev, true);
+		amdgpu_device_cache_pci_state(pdev);
+		/* Shut down the device */
+		pci_disable_device(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
+	}
+}
+
+/**
+ * amdgpu_switcheroo_can_switch - see if switcheroo state can change
+ *
+ * @pdev: pci dev pointer
+ *
+ * Callback for the switcheroo driver.  Check of the switcheroo
+ * state can be changed.
+ * Returns true if the state can be changed, false if not.
+ */
+static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+
+       /*
+	* FIXME: open_count is protected by drm_global_mutex but that would lead to
+	* locking inversion with the driver load path. And the access here is
+	* completely racy anyway. So don't bother with locking for now.
+	*/
+	return atomic_read(&dev->open_count) == 0;
+}
+
+static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
+	.set_gpu_state = amdgpu_switcheroo_set_state,
+	.reprobe = NULL,
+	.can_switch = amdgpu_switcheroo_can_switch,
+};
+
+/**
+ * amdgpu_device_ip_set_clockgating_state - set the CG state
+ *
+ * @dev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: clockgating state (gate or ungate)
+ *
+ * Sets the requested clockgating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_clockgating_state(void *dev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = dev;
+	int i, r = 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->type != block_type)
+			continue;
+		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
+			continue;
+		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
+			&adev->ip_blocks[i], state);
+		if (r)
+			dev_err(adev->dev,
+				"set_clockgating_state of IP block <%s> failed %d\n",
+				adev->ip_blocks[i].version->funcs->name, r);
+	}
+	return r;
+}
+
+/**
+ * amdgpu_device_ip_set_powergating_state - set the PG state
+ *
+ * @dev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: powergating state (gate or ungate)
+ *
+ * Sets the requested powergating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
+int amdgpu_device_ip_set_powergating_state(void *dev,
+					   enum amd_ip_block_type block_type,
+					   enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = dev;
+	int i, r = 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->type != block_type)
+			continue;
+		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
+			continue;
+		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
+			&adev->ip_blocks[i], state);
+		if (r)
+			dev_err(adev->dev,
+				"set_powergating_state of IP block <%s> failed %d\n",
+				adev->ip_blocks[i].version->funcs->name, r);
+	}
+	return r;
+}
+
+/**
+ * amdgpu_device_ip_get_clockgating_state - get the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: clockgating feature flags
+ *
+ * Walks the list of IPs on the device and updates the clockgating
+ * flags for each IP.
+ * Updates @flags with the feature flags for each hardware IP where
+ * clockgating is enabled.
+ */
+void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
+					    u64 *flags)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
+			adev->ip_blocks[i].version->funcs->get_clockgating_state(
+				&adev->ip_blocks[i], flags);
+	}
+}
+
+/**
+ * amdgpu_device_ip_wait_for_idle - wait for idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Waits for the request hardware IP to be idle.
+ * Returns 0 for success or a negative error code on failure.
+ */
+int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
+				   enum amd_ip_block_type block_type)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->type == block_type) {
+			if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
+				r = adev->ip_blocks[i].version->funcs->wait_for_idle(
+								&adev->ip_blocks[i]);
+				if (r)
+					return r;
+			}
+			break;
+		}
+	}
+	return 0;
+
+}
+
+/**
+ * amdgpu_device_ip_is_hw - is the hardware IP enabled
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
+ */
+bool amdgpu_device_ip_is_hw(struct amdgpu_device *adev,
+			    enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (adev->ip_blocks[i].version->type == block_type)
+			return adev->ip_blocks[i].status.hw;
+	}
+	return false;
+}
+
+/**
+ * amdgpu_device_ip_is_valid - is the hardware IP valid
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is valid or not.
+ * Returns true if it the IP is valid, false if not.
+ */
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+			       enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (adev->ip_blocks[i].version->type == block_type)
+			return adev->ip_blocks[i].status.valid;
+	}
+	return false;
+
+}
+
+/**
+ * amdgpu_device_ip_get_ip_block - get a hw IP pointer
+ *
+ * @adev: amdgpu_device pointer
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Returns a pointer to the hardware IP block structure
+ * if it exists for the asic, otherwise NULL.
+ */
+struct amdgpu_ip_block *
+amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
+			      enum amd_ip_block_type type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++)
+		if (adev->ip_blocks[i].version->type == type)
+			return &adev->ip_blocks[i];
+
+	return NULL;
+}
+
+/**
+ * amdgpu_device_ip_block_version_cmp
+ *
+ * @adev: amdgpu_device pointer
+ * @type: enum amd_ip_block_type
+ * @major: major version
+ * @minor: minor version
+ *
+ * return 0 if equal or greater
+ * return 1 if smaller or the ip_block doesn't exist
+ */
+int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
+				       enum amd_ip_block_type type,
+				       u32 major, u32 minor)
+{
+	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
+
+	if (ip_block && ((ip_block->version->major > major) ||
+			((ip_block->version->major == major) &&
+			(ip_block->version->minor >= minor))))
+		return 0;
+
+	return 1;
+}
+
+static const char *ip_block_names[] = {
+	[AMD_IP_BLOCK_TYPE_COMMON] = "common",
+	[AMD_IP_BLOCK_TYPE_GMC] = "gmc",
+	[AMD_IP_BLOCK_TYPE_IH] = "ih",
+	[AMD_IP_BLOCK_TYPE_SMC] = "smu",
+	[AMD_IP_BLOCK_TYPE_PSP] = "psp",
+	[AMD_IP_BLOCK_TYPE_DCE] = "dce",
+	[AMD_IP_BLOCK_TYPE_GFX] = "gfx",
+	[AMD_IP_BLOCK_TYPE_SDMA] = "sdma",
+	[AMD_IP_BLOCK_TYPE_UVD] = "uvd",
+	[AMD_IP_BLOCK_TYPE_VCE] = "vce",
+	[AMD_IP_BLOCK_TYPE_ACP] = "acp",
+	[AMD_IP_BLOCK_TYPE_VCN] = "vcn",
+	[AMD_IP_BLOCK_TYPE_MES] = "mes",
+	[AMD_IP_BLOCK_TYPE_JPEG] = "jpeg",
+	[AMD_IP_BLOCK_TYPE_VPE] = "vpe",
+	[AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm",
+	[AMD_IP_BLOCK_TYPE_ISP] = "isp",
+	[AMD_IP_BLOCK_TYPE_RAS] = "ras",
+};
+
+static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type)
+{
+	int idx = (int)type;
+
+	return idx < ARRAY_SIZE(ip_block_names) ? ip_block_names[idx] : "unknown";
+}
+
+/**
+ * amdgpu_device_ip_block_add
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_block_version: pointer to the IP to add
+ *
+ * Adds the IP block driver information to the collection of IPs
+ * on the asic.
+ */
+int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
+			       const struct amdgpu_ip_block_version *ip_block_version)
+{
+	if (!ip_block_version)
+		return -EINVAL;
+
+	switch (ip_block_version->type) {
+	case AMD_IP_BLOCK_TYPE_VCN:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+			return 0;
+		break;
+	case AMD_IP_BLOCK_TYPE_JPEG:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+			return 0;
+		break;
+	default:
+		break;
+	}
+
+	dev_info(adev->dev, "detected ip block number %d <%s_v%d_%d_%d> (%s)\n",
+		 adev->num_ip_blocks,
+		 ip_block_name(adev, ip_block_version->type),
+		 ip_block_version->major,
+		 ip_block_version->minor,
+		 ip_block_version->rev,
+		 ip_block_version->funcs->name);
+
+	adev->ip_blocks[adev->num_ip_blocks].adev = adev;
+
+	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_enable_virtual_display - enable virtual display feature
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enabled the virtual display feature if the user has enabled it via
+ * the module parameter virtual_display.  This feature provides a virtual
+ * display hardware on headless boards or in virtualized environments.
+ * This function parses and validates the configuration string specified by
+ * the user and configures the virtual display configuration (number of
+ * virtual connectors, crtcs, etc.) specified.
+ */
+static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
+{
+	adev->enable_virtual_display = false;
+
+	if (amdgpu_virtual_display) {
+		const char *pci_address_name = pci_name(adev->pdev);
+		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
+
+		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
+		pciaddstr_tmp = pciaddstr;
+		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
+			pciaddname = strsep(&pciaddname_tmp, ",");
+			if (!strcmp("all", pciaddname)
+			    || !strcmp(pci_address_name, pciaddname)) {
+				long num_crtc;
+				int res = -1;
+
+				adev->enable_virtual_display = true;
+
+				if (pciaddname_tmp)
+					res = kstrtol(pciaddname_tmp, 10,
+						      &num_crtc);
+
+				if (!res) {
+					if (num_crtc < 1)
+						num_crtc = 1;
+					if (num_crtc > 6)
+						num_crtc = 6;
+					adev->mode_info.num_crtc = num_crtc;
+				} else {
+					adev->mode_info.num_crtc = 1;
+				}
+				break;
+			}
+		}
+
+		dev_info(
+			adev->dev,
+			"virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
+			amdgpu_virtual_display, pci_address_name,
+			adev->enable_virtual_display, adev->mode_info.num_crtc);
+
+		kfree(pciaddstr);
+	}
+}
+
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
+		adev->mode_info.num_crtc = 1;
+		adev->enable_virtual_display = true;
+		dev_info(adev->dev, "virtual_display:%d, num_crtc:%d\n",
+			 adev->enable_virtual_display,
+			 adev->mode_info.num_crtc);
+	}
+}
+
+/**
+ * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Parses the asic configuration parameters specified in the gpu info
+ * firmware and makes them available to the driver for use in configuring
+ * the asic.
+ * Returns 0 on success, -EINVAL on failure.
+ */
+static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
+{
+	const char *chip_name;
+	int err;
+	const struct gpu_info_firmware_header_v1_0 *hdr;
+
+	adev->firmware.gpu_info_fw = NULL;
+
+	switch (adev->asic_type) {
+	default:
+		return 0;
+	case CHIP_VEGA10:
+		chip_name = "vega10";
+		break;
+	case CHIP_VEGA12:
+		chip_name = "vega12";
+		break;
+	case CHIP_RAVEN:
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+			chip_name = "raven2";
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+			chip_name = "picasso";
+		else
+			chip_name = "raven";
+		break;
+	case CHIP_ARCTURUS:
+		chip_name = "arcturus";
+		break;
+	case CHIP_NAVI12:
+		if (adev->discovery.bin)
+			return 0;
+		chip_name = "navi12";
+		break;
+	case CHIP_CYAN_SKILLFISH:
+		if (adev->discovery.bin)
+			return 0;
+		chip_name = "cyan_skillfish";
+		break;
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+				   AMDGPU_UCODE_OPTIONAL,
+				   "amdgpu/%s_gpu_info.bin", chip_name);
+	if (err) {
+		dev_err(adev->dev,
+			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
+			chip_name);
+		goto out;
+	}
+
+	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
+	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
+
+	switch (hdr->version_major) {
+	case 1:
+	{
+		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
+			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
+								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+		/*
+		 * Should be dropped when DAL no longer needs it.
+		 */
+		if (adev->asic_type == CHIP_NAVI12)
+			goto parse_soc_bounding_box;
+
+		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
+		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
+		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
+		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
+		adev->gfx.config.max_texture_channel_caches =
+			le32_to_cpu(gpu_info_fw->gc_num_tccs);
+		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
+		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
+		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
+		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
+		adev->gfx.config.double_offchip_lds_buf =
+			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
+		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
+		adev->gfx.cu_info.max_waves_per_simd =
+			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
+		adev->gfx.cu_info.max_scratch_slots_per_cu =
+			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
+		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
+		if (hdr->version_minor >= 1) {
+			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
+				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
+									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			adev->gfx.config.num_sc_per_sh =
+				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
+			adev->gfx.config.num_packer_per_sc =
+				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
+		}
+
+parse_soc_bounding_box:
+		/*
+		 * soc bounding box info is not integrated in disocovery table,
+		 * we always need to parse it from gpu info firmware if needed.
+		 */
+		if (hdr->version_minor == 2) {
+			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
+				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
+									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
+		}
+		break;
+	}
+	default:
+		dev_err(adev->dev,
+			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
+		err = -EINVAL;
+		goto out;
+	}
+out:
+	return err;
+}
+
+static void amdgpu_uid_init(struct amdgpu_device *adev)
+{
+	/* Initialize the UID for the device */
+	adev->uid_info = kzalloc(sizeof(struct amdgpu_uid), GFP_KERNEL);
+	if (!adev->uid_info) {
+		dev_warn(adev->dev, "Failed to allocate memory for UID\n");
+		return;
+	}
+	adev->uid_info->adev = adev;
+}
+
+static void amdgpu_uid_fini(struct amdgpu_device *adev)
+{
+	/* Free the UID memory */
+	kfree(adev->uid_info);
+	adev->uid_info = NULL;
+}
+
+/**
+ * amdgpu_device_ip_early_init - run early init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Early initialization pass for hardware IPs.  The hardware IPs that make
+ * up each asic are discovered each IP's early_init callback is run.  This
+ * is the first stage in initializing the asic.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ip_block *ip_block;
+	struct pci_dev *parent;
+	bool total, skip_bios;
+	uint32_t bios_flags;
+	int i, r;
+
+	amdgpu_device_enable_virtual_display(adev);
+
+	if (amdgpu_sriov_vf(adev)) {
+		r = amdgpu_virt_request_full_gpu(adev, true);
+		if (r)
+			return r;
+
+		r = amdgpu_virt_init_critical_region(adev);
+		if (r)
+			return r;
+	}
+
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_VERDE:
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+		adev->family = AMDGPU_FAMILY_SI;
+		r = si_set_ip_blocks(adev);
+		if (r)
+			return r;
+		break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		if (adev->flags & AMD_IS_APU)
+			adev->family = AMDGPU_FAMILY_KV;
+		else
+			adev->family = AMDGPU_FAMILY_CI;
+
+		r = cik_set_ip_blocks(adev);
+		if (r)
+			return r;
+		break;
+#endif
+	case CHIP_TOPAZ:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		if (adev->flags & AMD_IS_APU)
+			adev->family = AMDGPU_FAMILY_CZ;
+		else
+			adev->family = AMDGPU_FAMILY_VI;
+
+		r = vi_set_ip_blocks(adev);
+		if (r)
+			return r;
+		break;
+	default:
+		r = amdgpu_discovery_set_ip_blocks(adev);
+		if (r)
+			return r;
+		break;
+	}
+
+	/* Check for IP version 9.4.3 with A0 hardware */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+	    !amdgpu_device_get_rev_id(adev)) {
+		dev_err(adev->dev, "Unsupported A0 hardware\n");
+		return -ENODEV;	/* device unsupported - no device error */
+	}
+
+	if (amdgpu_has_atpx() &&
+	    (amdgpu_is_atpx_hybrid() ||
+	     amdgpu_has_atpx_dgpu_power_cntl()) &&
+	    ((adev->flags & AMD_IS_APU) == 0) &&
+	    !dev_is_removable(&adev->pdev->dev))
+		adev->flags |= AMD_IS_PX;
+
+	if (!(adev->flags & AMD_IS_APU)) {
+		parent = pcie_find_root_port(adev->pdev);
+		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+	}
+
+	adev->pm.pp_feature = amdgpu_pp_feature_mask;
+	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
+		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
+	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
+		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
+
+	adev->virt.is_xgmi_node_migrate_enabled = false;
+	if (amdgpu_sriov_vf(adev)) {
+		adev->virt.is_xgmi_node_migrate_enabled =
+			amdgpu_ip_version((adev), GC_HWIP, 0) == IP_VERSION(9, 4, 4);
+	}
+
+	total = true;
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		ip_block = &adev->ip_blocks[i];
+
+		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
+			dev_warn(adev->dev, "disabled ip block: %d <%s>\n", i,
+				 adev->ip_blocks[i].version->funcs->name);
+			adev->ip_blocks[i].status.valid = false;
+		} else if (ip_block->version->funcs->early_init) {
+			r = ip_block->version->funcs->early_init(ip_block);
+			if (r == -ENOENT) {
+				adev->ip_blocks[i].status.valid = false;
+			} else if (r) {
+				dev_err(adev->dev,
+					"early_init of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				total = false;
+			} else {
+				adev->ip_blocks[i].status.valid = true;
+			}
+		} else {
+			adev->ip_blocks[i].status.valid = true;
+		}
+		/* get the vbios after the asic_funcs are set up */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			r = amdgpu_device_parse_gpu_info_fw(adev);
+			if (r)
+				return r;
+
+			bios_flags = amdgpu_device_get_vbios_flags(adev);
+			skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
+			/* Read BIOS */
+			if (!skip_bios) {
+				bool optional =
+					!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
+				if (!amdgpu_get_bios(adev) && !optional)
+					return -EINVAL;
+
+				if (optional && !adev->bios)
+					dev_info(
+						adev->dev,
+						"VBIOS image optional, proceeding without VBIOS image");
+
+				if (adev->bios) {
+					r = amdgpu_atombios_init(adev);
+					if (r) {
+						dev_err(adev->dev,
+							"amdgpu_atombios_init failed\n");
+						amdgpu_vf_error_put(
+							adev,
+							AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
+							0, 0);
+						return r;
+					}
+				}
+			}
+
+			/*get pf2vf msg info at it's earliest time*/
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_virt_init_data_exchange(adev);
+
+		}
+	}
+	if (!total)
+		return -ENODEV;
+
+	if (adev->gmc.xgmi.supported)
+		amdgpu_xgmi_early_init(adev);
+
+	if (amdgpu_is_multi_aid(adev))
+		amdgpu_uid_init(adev);
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (ip_block->status.valid != false)
+		amdgpu_amdkfd_device_probe(adev);
+
+	adev->cg_flags &= amdgpu_cg_mask;
+	adev->pg_flags &= amdgpu_pg_mask;
+
+	return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.sw)
+			continue;
+		if (adev->ip_blocks[i].status.hw)
+			continue;
+		if (!amdgpu_ip_member_of_hwini(
+			    adev, adev->ip_blocks[i].version->type))
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev,
+					"hw_init of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				return r;
+			}
+			adev->ip_blocks[i].status.hw = true;
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.sw)
+			continue;
+		if (adev->ip_blocks[i].status.hw)
+			continue;
+		if (!amdgpu_ip_member_of_hwini(
+			    adev, adev->ip_blocks[i].version->type))
+			continue;
+		r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+		if (r) {
+			dev_err(adev->dev,
+				"hw_init of IP block <%s> failed %d\n",
+				adev->ip_blocks[i].version->funcs->name, r);
+			return r;
+		}
+		adev->ip_blocks[i].status.hw = true;
+	}
+
+	return 0;
+}
+
+static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
+{
+	int r = 0;
+	int i;
+	uint32_t smu_version;
+
+	if (adev->asic_type >= CHIP_VEGA10) {
+		for (i = 0; i < adev->num_ip_blocks; i++) {
+			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
+				continue;
+
+			if (!amdgpu_ip_member_of_hwini(adev,
+						       AMD_IP_BLOCK_TYPE_PSP))
+				break;
+
+			if (!adev->ip_blocks[i].status.sw)
+				continue;
+
+			/* no need to do the fw loading again if already done*/
+			if (adev->ip_blocks[i].status.hw == true)
+				break;
+
+			if (amdgpu_in_reset(adev) || adev->in_suspend) {
+				r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+				if (r)
+					return r;
+			} else {
+				r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+				if (r) {
+					dev_err(adev->dev,
+						"hw_init of IP block <%s> failed %d\n",
+						adev->ip_blocks[i]
+							.version->funcs->name,
+						r);
+					return r;
+				}
+				adev->ip_blocks[i].status.hw = true;
+			}
+			break;
+		}
+	}
+
+	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
+		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
+
+	return r;
+}
+
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+	struct drm_sched_init_args args = {
+		.ops = &amdgpu_sched_ops,
+		.num_rqs = DRM_SCHED_PRIORITY_COUNT,
+		.timeout_wq = adev->reset_domain->wq,
+		.dev = adev->dev,
+	};
+	long timeout;
+	int r, i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		/* No need to setup the GPU scheduler for rings that don't need it */
+		if (!ring || ring->no_scheduler)
+			continue;
+
+		switch (ring->funcs->type) {
+		case AMDGPU_RING_TYPE_GFX:
+			timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_COMPUTE:
+			timeout = adev->compute_timeout;
+			break;
+		case AMDGPU_RING_TYPE_SDMA:
+			timeout = adev->sdma_timeout;
+			break;
+		default:
+			timeout = adev->video_timeout;
+			break;
+		}
+
+		args.timeout = timeout;
+		args.credit_limit = ring->num_hw_submission;
+		args.score = ring->sched_score;
+		args.name = ring->name;
+
+		r = drm_sched_init(&ring->sched, &args);
+		if (r) {
+			dev_err(adev->dev,
+				"Failed to create scheduler on ring %s.\n",
+				ring->name);
+			return r;
+		}
+		r = amdgpu_uvd_entity_init(adev, ring);
+		if (r) {
+			dev_err(adev->dev,
+				"Failed to create UVD scheduling entity on ring %s.\n",
+				ring->name);
+			return r;
+		}
+		r = amdgpu_vce_entity_init(adev, ring);
+		if (r) {
+			dev_err(adev->dev,
+				"Failed to create VCE scheduling entity on ring %s.\n",
+				ring->name);
+			return r;
+		}
+	}
+
+	if (adev->xcp_mgr)
+		amdgpu_xcp_update_partition_sched_list(adev);
+
+	return 0;
+}
+
+
+/**
+ * amdgpu_device_ip_init - run init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main initialization pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the sw_init and hw_init callbacks
+ * are run.  sw_init initializes the software state associated with each IP
+ * and hw_init initializes the hardware associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+{
+	bool init_badpage;
+	int i, r;
+
+	r = amdgpu_ras_init(adev);
+	if (r)
+		return r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->sw_init) {
+			r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev,
+					"sw_init of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				goto init_failed;
+			}
+		}
+		adev->ip_blocks[i].status.sw = true;
+
+		if (!amdgpu_ip_member_of_hwini(
+			    adev, adev->ip_blocks[i].version->type))
+			continue;
+
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			/* need to do common hw init early so everything is set up for gmc */
+			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev, "hw_init %d failed %d\n", i,
+					r);
+				goto init_failed;
+			}
+			adev->ip_blocks[i].status.hw = true;
+		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			/* need to do gmc hw init early so we can allocate gpu mem */
+			/* Try to reserve bad pages early */
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_virt_exchange_data(adev);
+
+			r = amdgpu_device_mem_scratch_init(adev);
+			if (r) {
+				dev_err(adev->dev,
+					"amdgpu_mem_scratch_init failed %d\n",
+					r);
+				goto init_failed;
+			}
+			r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev, "hw_init %d failed %d\n", i,
+					r);
+				goto init_failed;
+			}
+			r = amdgpu_device_wb_init(adev);
+			if (r) {
+				dev_err(adev->dev,
+					"amdgpu_device_wb_init failed %d\n", r);
+				goto init_failed;
+			}
+			adev->ip_blocks[i].status.hw = true;
+
+			/* right after GMC hw init, we create CSA */
+			if (adev->gfx.mcbp) {
+				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
+							       AMDGPU_GEM_DOMAIN_VRAM |
+							       AMDGPU_GEM_DOMAIN_GTT,
+							       AMDGPU_CSA_SIZE);
+				if (r) {
+					dev_err(adev->dev,
+						"allocate CSA failed %d\n", r);
+					goto init_failed;
+				}
+			}
+
+			r = amdgpu_seq64_init(adev);
+			if (r) {
+				dev_err(adev->dev, "allocate seq64 failed %d\n",
+					r);
+				goto init_failed;
+			}
+		}
+	}
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_init_data_exchange(adev);
+
+	r = amdgpu_ib_pool_init(adev);
+	if (r) {
+		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
+		goto init_failed;
+	}
+
+	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
+	if (r)
+		goto init_failed;
+
+	r = amdgpu_device_ip_hw_init_phase1(adev);
+	if (r)
+		goto init_failed;
+
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		goto init_failed;
+
+	r = amdgpu_device_ip_hw_init_phase2(adev);
+	if (r)
+		goto init_failed;
+
+	/*
+	 * retired pages will be loaded from eeprom and reserved here,
+	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
+	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
+	 * for I2C communication which only true at this point.
+	 *
+	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
+	 * failure from bad gpu situation and stop amdgpu init process
+	 * accordingly. For other failed cases, it will still release all
+	 * the resource and print error message, rather than returning one
+	 * negative value to upper level.
+	 *
+	 * Note: theoretically, this should be called before all vram allocations
+	 * to protect retired page from abusing
+	 */
+	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+	r = amdgpu_ras_recovery_init(adev, init_badpage);
+	if (r)
+		goto init_failed;
+
+	/**
+	 * In case of XGMI grab extra reference for reset domain for this device
+	 */
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		if (amdgpu_xgmi_add_device(adev) == 0) {
+			if (!amdgpu_sriov_vf(adev)) {
+				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+				if (WARN_ON(!hive)) {
+					r = -ENOENT;
+					goto init_failed;
+				}
+
+				if (!hive->reset_domain ||
+				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+					r = -ENOENT;
+					amdgpu_put_xgmi_hive(hive);
+					goto init_failed;
+				}
+
+				/* Drop the early temporary reset domain we created for device */
+				amdgpu_reset_put_reset_domain(adev->reset_domain);
+				adev->reset_domain = hive->reset_domain;
+				amdgpu_put_xgmi_hive(hive);
+			}
+		}
+	}
+
+	r = amdgpu_device_init_schedulers(adev);
+	if (r)
+		goto init_failed;
+
+	if (adev->mman.buffer_funcs_ring->sched.ready)
+		amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+	/* Don't init kfd if whole hive need to be reset during init */
+	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+		kgd2kfd_init_zone_device(adev);
+		amdgpu_amdkfd_device_init(adev);
+	}
+
+	amdgpu_fru_get_product_info(adev);
+
+	if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+		r = amdgpu_cper_init(adev);
+
+init_failed:
+
+	return r;
+}
+
+/**
+ * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
+ * this function before a GPU reset.  If the value is retained after a
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
+ */
+static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
+{
+	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
+}
+
+/**
+ * amdgpu_device_check_vram_lost - check if vram is valid
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Checks the reset magic value written to the gart pointer in VRAM.
+ * The driver calls this after a GPU reset to see if the contents of
+ * VRAM is lost or now.
+ * returns true if vram is lost, false if not.
+ */
+static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
+{
+	if (memcmp(adev->gart.ptr, adev->reset_magic,
+			AMDGPU_RESET_MAGIC_NUM))
+		return true;
+
+	if (!amdgpu_in_reset(adev))
+		return false;
+
+	/*
+	 * For all ASICs with baco/mode1 reset, the VRAM is
+	 * always assumed to be lost.
+	 */
+	switch (amdgpu_asic_reset_method(adev)) {
+	case AMD_RESET_METHOD_LEGACY:
+	case AMD_RESET_METHOD_LINK:
+	case AMD_RESET_METHOD_BACO:
+	case AMD_RESET_METHOD_MODE1:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * amdgpu_device_set_cg_state - set clockgating for amdgpu device
+ *
+ * @adev: amdgpu_device pointer
+ * @state: clockgating state (gate or ungate)
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * set_clockgating_state callbacks are run.
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * Fini or suspend, pass disabling clockgating for hardware IPs.
+ * Returns 0 on success, negative error code on failure.
+ */
+
+int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
+{
+	int i, j, r;
+
+	if (amdgpu_emu_mode == 1)
+		return 0;
+
+	for (j = 0; j < adev->num_ip_blocks; j++) {
+		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+		if (!adev->ip_blocks[i].status.late_initialized)
+			continue;
+		/* skip CG for GFX, SDMA on S0ix */
+		if (adev->in_s0ix &&
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+			continue;
+		/* skip CG for VCE/UVD, it's handled specially */
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
+		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
+			/* enable clockgating to save power */
+			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
+										     state);
+			if (r) {
+				dev_err(adev->dev,
+					"set_clockgating_state(gate) of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
+			       enum amd_powergating_state state)
+{
+	int i, j, r;
+
+	if (amdgpu_emu_mode == 1)
+		return 0;
+
+	for (j = 0; j < adev->num_ip_blocks; j++) {
+		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+		if (!adev->ip_blocks[i].status.late_initialized)
+			continue;
+		/* skip PG for GFX, SDMA on S0ix */
+		if (adev->in_s0ix &&
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
+			continue;
+		/* skip CG for VCE/UVD/VPE, it's handled specially */
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VPE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
+		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
+			/* enable powergating to save power */
+			r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
+											state);
+			if (r) {
+				dev_err(adev->dev,
+					"set_powergating_state(gate) of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				return r;
+			}
+		}
+	}
+	return 0;
+}
+
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+	struct amdgpu_gpu_instance *gpu_ins;
+	struct amdgpu_device *adev;
+	int i, ret = 0;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	/*
+	 * MGPU fan boost feature should be enabled
+	 * only when there are two or more dGPUs in
+	 * the system
+	 */
+	if (mgpu_info.num_dgpu < 2)
+		goto out;
+
+	for (i = 0; i < mgpu_info.num_dgpu; i++) {
+		gpu_ins = &(mgpu_info.gpu_ins[i]);
+		adev = gpu_ins->adev;
+		if (!(adev->flags & AMD_IS_APU || amdgpu_sriov_multi_vf_mode(adev)) &&
+		    !gpu_ins->mgpu_fan_enabled) {
+			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+			if (ret)
+				break;
+
+			gpu_ins->mgpu_fan_enabled = 1;
+		}
+	}
+
+out:
+	mutex_unlock(&mgpu_info.mutex);
+
+	return ret;
+}
+
+/**
+ * amdgpu_device_ip_late_init - run late init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the late_init callbacks are run.
+ * late_init covers any special initialization that an IP requires
+ * after all of the have been initialized or something that needs to happen
+ * late in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_gpu_instance *gpu_instance;
+	int i = 0, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->late_init) {
+			r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev,
+					"late_init of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+				return r;
+			}
+		}
+		adev->ip_blocks[i].status.late_initialized = true;
+	}
+
+	r = amdgpu_ras_late_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_ras_late_init failed %d", r);
+		return r;
+	}
+
+	if (!amdgpu_reset_in_recovery(adev))
+		amdgpu_ras_set_error_query_ready(adev, true);
+
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
+	amdgpu_device_fill_reset_magic(adev);
+
+	r = amdgpu_device_enable_mgpu_fan_boost();
+	if (r)
+		dev_err(adev->dev, "enable mgpu fan boost failed (%d).\n", r);
+
+	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
+	if (amdgpu_passthrough(adev) &&
+	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
+	     adev->asic_type == CHIP_ALDEBARAN))
+		amdgpu_dpm_handle_passthrough_sbr(adev, true);
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		mutex_lock(&mgpu_info.mutex);
+
+		/*
+		 * Reset device p-state to low as this was booted with high.
+		 *
+		 * This should be performed only after all devices from the same
+		 * hive get initialized.
+		 *
+		 * However, it's unknown how many device in the hive in advance.
+		 * As this is counted one by one during devices initializations.
+		 *
+		 * So, we wait for all XGMI interlinked devices initialized.
+		 * This may bring some delays as those devices may come from
+		 * different hives. But that should be OK.
+		 */
+		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
+			for (i = 0; i < mgpu_info.num_gpu; i++) {
+				gpu_instance = &(mgpu_info.gpu_ins[i]);
+				if (gpu_instance->adev->flags & AMD_IS_APU)
+					continue;
+
+				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
+						AMDGPU_XGMI_PSTATE_MIN);
+				if (r) {
+					dev_err(adev->dev,
+						"pstate setting failed (%d).\n",
+						r);
+					break;
+				}
+			}
+		}
+
+		mutex_unlock(&mgpu_info.mutex);
+	}
+
+	return 0;
+}
+
+static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	if (!ip_block->version->funcs->hw_fini) {
+		dev_err(adev->dev, "hw_fini of IP block <%s> not defined\n",
+			ip_block->version->funcs->name);
+	} else {
+		r = ip_block->version->funcs->hw_fini(ip_block);
+		/* XXX handle errors */
+		if (r) {
+			dev_dbg(adev->dev,
+				"hw_fini of IP block <%s> failed %d\n",
+				ip_block->version->funcs->name, r);
+		}
+	}
+
+	ip_block->status.hw = false;
+}
+
+/**
+ * amdgpu_device_smu_fini_early - smu hw_fini wrapper
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * For ASICs need to disable SMC first
+ */
+static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+		return;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+			amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
+			break;
+		}
+	}
+}
+
+static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].version->funcs->early_fini)
+			continue;
+
+		r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
+		if (r) {
+			dev_dbg(adev->dev,
+				"early_fini of IP block <%s> failed %d\n",
+				adev->ip_blocks[i].version->funcs->name, r);
+		}
+	}
+
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+	amdgpu_amdkfd_suspend(adev, true);
+	amdgpu_userq_suspend(adev);
+
+	/* Workaround for ASICs need to disable SMC first */
+	amdgpu_device_smu_fini_early(adev);
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.hw)
+			continue;
+
+		amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
+	}
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (amdgpu_virt_release_full_gpu(adev, false))
+			dev_err(adev->dev,
+				"failed to release exclusive mode on fini\n");
+	}
+
+	/*
+	 * Driver reload on the APU can fail due to firmware validation because
+	 * the PSP is always running, as it is shared across the whole SoC.
+	 * This same issue does not occur on dGPU because it has a mechanism
+	 * that checks whether the PSP is running. A solution for those issues
+	 * in the APU is to trigger a GPU reset, but this should be done during
+	 * the unload phase to avoid adding boot latency and screen flicker.
+	 */
+	if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
+		r = amdgpu_asic_reset(adev);
+		if (r)
+			dev_err(adev->dev, "asic reset on %s failed\n", __func__);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run.  hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	amdgpu_cper_fini(adev);
+
+	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
+		amdgpu_virt_release_ras_err_handler_data(adev);
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1)
+		amdgpu_xgmi_remove_device(adev);
+
+	amdgpu_amdkfd_device_fini_sw(adev);
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.sw)
+			continue;
+
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+			amdgpu_ucode_free_bo(adev);
+			amdgpu_free_static_csa(&adev->virt.csa_obj);
+			amdgpu_device_wb_fini(adev);
+			amdgpu_device_mem_scratch_fini(adev);
+			amdgpu_ib_pool_fini(adev);
+			amdgpu_seq64_fini(adev);
+			amdgpu_doorbell_fini(adev);
+		}
+		if (adev->ip_blocks[i].version->funcs->sw_fini) {
+			r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
+			/* XXX handle errors */
+			if (r) {
+				dev_dbg(adev->dev,
+					"sw_fini of IP block <%s> failed %d\n",
+					adev->ip_blocks[i].version->funcs->name,
+					r);
+			}
+		}
+		adev->ip_blocks[i].status.sw = false;
+		adev->ip_blocks[i].status.valid = false;
+	}
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.late_initialized)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->late_fini)
+			adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
+		adev->ip_blocks[i].status.late_initialized = false;
+	}
+
+	amdgpu_ras_fini(adev);
+	amdgpu_uid_fini(adev);
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
+ *
+ * @work: work_struct.
+ */
+static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, delayed_init_work.work);
+	int r;
+
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+}
+
+static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
+
+	WARN_ON_ONCE(adev->gfx.gfx_off_state);
+	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
+
+	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
+		adev->gfx.gfx_off_state = true;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
+{
+	int i, r, rec;
+
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
+	/*
+	 * Per PMFW team's suggestion, driver needs to handle gfxoff
+	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
+	 * scenario. Add the missing df cstate disablement here.
+	 */
+	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+		dev_warn(adev->dev, "Failed to disallow df cstate");
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+
+		/* displays are handled separately */
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
+			continue;
+
+		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+		if (r)
+			goto unwind;
+	}
+
+	return 0;
+unwind:
+	rec = amdgpu_device_ip_resume_phase3(adev);
+	if (rec)
+		dev_err(adev->dev,
+			"amdgpu_device_ip_resume_phase3 failed during unwind: %d\n",
+			rec);
+
+	amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW);
+
+	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+
+	return r;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
+{
+	int i, r, rec;
+
+	if (adev->in_s0ix)
+		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		/* displays are handled in phase1 */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+			continue;
+		/* PSP lost connection when err_event_athub occurs */
+		if (amdgpu_ras_intr_triggered() &&
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+			adev->ip_blocks[i].status.hw = false;
+			continue;
+		}
+
+		/* skip unnecessary suspend if we do not initialize them yet */
+		if (!amdgpu_ip_member_of_hwini(
+			    adev, adev->ip_blocks[i].version->type))
+			continue;
+
+		/* Since we skip suspend for S0i3, we need to cancel the delayed
+		 * idle work here as the suspend callback never gets called.
+		 */
+		if (adev->in_s0ix &&
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
+		    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+			cancel_delayed_work_sync(&adev->gfx.idle_work);
+		/* skip suspend of gfx/mes and psp for S0ix
+		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
+		 * like at runtime. PSP is also part of the always on hardware
+		 * so no need to suspend it.
+		 */
+		if (adev->in_s0ix &&
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
+			continue;
+
+		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+		if (adev->in_s0ix &&
+		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+		     IP_VERSION(5, 0, 0)) &&
+		    (adev->ip_blocks[i].version->type ==
+		     AMD_IP_BLOCK_TYPE_SDMA))
+			continue;
+
+		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+		 * from this location and RLC Autoload automatically also gets loaded
+		 * from here based on PMFW -> PSP message during re-init sequence.
+		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
+		 * the TMR and reload FWs again for IMU enabled APU ASICs.
+		 */
+		if (amdgpu_in_reset(adev) &&
+		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+			continue;
+
+		r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+		if (r)
+			goto unwind;
+
+		/* handle putting the SMC in the appropriate state */
+		if (!amdgpu_sriov_vf(adev)) {
+			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
+				if (r) {
+					dev_err(adev->dev,
+						"SMC failed to set mp1 state %d, %d\n",
+						adev->mp1_state, r);
+					goto unwind;
+				}
+			}
+		}
+	}
+
+	return 0;
+unwind:
+	/* suspend phase 2 = resume phase 1 + resume phase 2 */
+	rec = amdgpu_device_ip_resume_phase1(adev);
+	if (rec) {
+		dev_err(adev->dev,
+			"amdgpu_device_ip_resume_phase1 failed during unwind: %d\n",
+			rec);
+		return r;
+	}
+
+	rec = amdgpu_device_fw_loading(adev);
+	if (rec) {
+		dev_err(adev->dev,
+			"amdgpu_device_fw_loading failed during unwind: %d\n",
+			rec);
+		return r;
+	}
+
+	rec = amdgpu_device_ip_resume_phase2(adev);
+	if (rec) {
+		dev_err(adev->dev,
+			"amdgpu_device_ip_resume_phase2 failed during unwind: %d\n",
+			rec);
+		return r;
+	}
+
+	return r;
+}
+
+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_fini_data_exchange(adev);
+		amdgpu_virt_request_full_gpu(adev, false);
+	}
+
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+	r = amdgpu_device_ip_suspend_phase1(adev);
+	if (r)
+		return r;
+	r = amdgpu_device_ip_suspend_phase2(adev);
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_release_full_gpu(adev, false);
+
+	return r;
+}
+
+static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_COMMON,
+		AMD_IP_BLOCK_TYPE_GMC,
+		AMD_IP_BLOCK_TYPE_PSP,
+		AMD_IP_BLOCK_TYPE_IH,
+	};
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		int j;
+		struct amdgpu_ip_block *block;
+
+		block = &adev->ip_blocks[i];
+		block->status.hw = false;
+
+		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
+
+			if (block->version->type != ip_order[j] ||
+				!block->status.valid)
+				continue;
+
+			r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
+			if (r) {
+				dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+					 block->version->funcs->name);
+				return r;
+			}
+			block->status.hw = true;
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
+{
+	struct amdgpu_ip_block *block;
+	int i, r = 0;
+
+	static enum amd_ip_block_type ip_order[] = {
+		AMD_IP_BLOCK_TYPE_SMC,
+		AMD_IP_BLOCK_TYPE_DCE,
+		AMD_IP_BLOCK_TYPE_GFX,
+		AMD_IP_BLOCK_TYPE_SDMA,
+		AMD_IP_BLOCK_TYPE_MES,
+		AMD_IP_BLOCK_TYPE_UVD,
+		AMD_IP_BLOCK_TYPE_VCE,
+		AMD_IP_BLOCK_TYPE_VCN,
+		AMD_IP_BLOCK_TYPE_JPEG
+	};
+
+	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+		block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
+
+		if (!block)
+			continue;
+
+		if (block->status.valid && !block->status.hw) {
+			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
+				r = amdgpu_ip_block_resume(block);
+			} else {
+				r = block->version->funcs->hw_init(block);
+			}
+
+			if (r) {
+				dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+					 block->version->funcs->name);
+				break;
+			}
+			block->status.hw = true;
+		}
+	}
+
+	return r;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * COMMON, GMC, and IH.  resume puts the hardware into a functional state
+ * after a suspend and updates the software state as necessary.  This
+ * function is also used for restoring the GPU after a GPU reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
+
+			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Second resume function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+ * necessary.  This function is also used for restoring the GPU after a GPU
+ * reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
+		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+			continue;
+		r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE.  resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary.  This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+			continue;
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+			r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main resume function for hardware IPs.  The hardware IPs
+ * are split into two resume functions because they are
+ * also used in recovering from a GPU reset and some additional
+ * steps need to be take between them.  In this case (S3/S4) they are
+ * run sequentially.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_device_ip_resume_phase1(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_device_ip_resume_phase2(adev);
+
+	if (adev->mman.buffer_funcs_ring->sched.ready)
+		amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
+	if (r)
+		return r;
+
+	amdgpu_fence_driver_hw_init(adev);
+
+	r = amdgpu_device_ip_resume_phase3(adev);
+
+	return r;
+}
+
+/**
+ * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Query the VBIOS data tables to determine if the board supports SR-IOV.
+ */
+static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev)) {
+		if (adev->is_atom_fw) {
+			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
+				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+		} else {
+			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
+				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
+		}
+
+		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
+	}
+}
+
+/**
+ * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
+ *
+ * @pdev : pci device context
+ * @asic_type: AMD asic type
+ *
+ * Check if there is DC (new modesetting infrastructre) support for an asic.
+ * returns true if DC has support, false if not.
+ */
+bool amdgpu_device_asic_has_dc_support(struct pci_dev *pdev,
+				       enum amd_asic_type asic_type)
+{
+	switch (asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_HAINAN:
+#endif
+	case CHIP_TOPAZ:
+		/* chips with no display hardware */
+		return false;
+#if defined(CONFIG_DRM_AMD_DC)
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+		return amdgpu_dc != 0 && IS_ENABLED(CONFIG_DRM_AMD_DC_SI);
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		/*
+		 * We have systems in the wild with these ASICs that require
+		 * TRAVIS and NUTMEG support which is not supported with DC.
+		 *
+		 * Fallback to the non-DC driver here by default so as not to
+		 * cause regressions.
+		 */
+		return amdgpu_dc > 0;
+	default:
+		return amdgpu_dc != 0;
+#else
+	default:
+		if (amdgpu_dc > 0)
+			dev_info_once(
+				&pdev->dev,
+				"Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
+		return false;
+#endif
+	}
+}
+
+/**
+ * amdgpu_device_has_dc_support - check if dc is supported
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns true for supported, false for not supported
+ */
+bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
+{
+	if (adev->enable_virtual_display ||
+	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+		return false;
+
+	return amdgpu_device_asic_has_dc_support(adev->pdev, adev->asic_type);
+}
+
+static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
+{
+	struct amdgpu_device *adev =
+		container_of(__work, struct amdgpu_device, xgmi_reset_work);
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+	/* It's a bug to not have a hive within this function */
+	if (WARN_ON(!hive))
+		return;
+
+	/*
+	 * Use task barrier to synchronize all xgmi reset works across the
+	 * hive. task_barrier_enter and task_barrier_exit will block
+	 * until all the threads running the xgmi reset works reach
+	 * those points. task_barrier_full will do both blocks.
+	 */
+	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+
+		task_barrier_enter(&hive->tb);
+		adev->asic_reset_res = amdgpu_device_baco_enter(adev);
+
+		if (adev->asic_reset_res)
+			goto fail;
+
+		task_barrier_exit(&hive->tb);
+		adev->asic_reset_res = amdgpu_device_baco_exit(adev);
+
+		if (adev->asic_reset_res)
+			goto fail;
+
+		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
+	} else {
+
+		task_barrier_full(&hive->tb);
+		adev->asic_reset_res =  amdgpu_asic_reset(adev);
+	}
+
+fail:
+	if (adev->asic_reset_res)
+		dev_warn(adev->dev,
+			 "ASIC reset failed with error, %d for drm dev, %s",
+			 adev->asic_reset_res, adev_to_drm(adev)->unique);
+	amdgpu_put_xgmi_hive(hive);
+}
+
+static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+	char *input = amdgpu_lockup_timeout;
+	char *timeout_setting = NULL;
+	int index = 0;
+	long timeout;
+	int ret = 0;
+
+	/* By default timeout for all queues is 2 sec */
+	adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+		adev->video_timeout = msecs_to_jiffies(2000);
+
+	if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
+		return 0;
+
+	while ((timeout_setting = strsep(&input, ",")) &&
+	       strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
+		ret = kstrtol(timeout_setting, 0, &timeout);
+		if (ret)
+			return ret;
+
+		if (timeout == 0) {
+			index++;
+			continue;
+		} else if (timeout < 0) {
+			timeout = MAX_SCHEDULE_TIMEOUT;
+			dev_warn(adev->dev, "lockup timeout disabled");
+			add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
+		} else {
+			timeout = msecs_to_jiffies(timeout);
+		}
+
+		switch (index++) {
+		case 0:
+			adev->gfx_timeout = timeout;
+			break;
+		case 1:
+			adev->compute_timeout = timeout;
+			break;
+		case 2:
+			adev->sdma_timeout = timeout;
+			break;
+		case 3:
+			adev->video_timeout = timeout;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* When only one value specified apply it to all queues. */
+	if (index == 1)
+		adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
+			adev->video_timeout = timeout;
+
+	return ret;
+}
+
+/**
+ * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
+ */
+static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
+{
+	struct iommu_domain *domain;
+
+	domain = iommu_get_domain_for_dev(adev->dev);
+	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+		adev->ram_is_direct_mapped = true;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+/**
+ * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * return if IOMMU remapping bar address
+ */
+static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
+{
+	struct iommu_domain *domain;
+
+	domain = iommu_get_domain_for_dev(adev->dev);
+	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
+		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
+		return true;
+
+	return false;
+}
+#endif
+
+static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
+{
+	if (amdgpu_mcbp == 1)
+		adev->gfx.mcbp = true;
+	else if (amdgpu_mcbp == 0)
+		adev->gfx.mcbp = false;
+
+	if (amdgpu_sriov_vf(adev))
+		adev->gfx.mcbp = true;
+
+	if (adev->gfx.mcbp)
+		dev_info(adev->dev, "MCBP is enabled\n");
+}
+
+static int amdgpu_device_sys_interface_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_atombios_sysfs_init(adev);
+	if (r)
+		drm_err(&adev->ddev,
+			"registering atombios sysfs failed (%d).\n", r);
+
+	r = amdgpu_pm_sysfs_init(adev);
+	if (r)
+		dev_err(adev->dev, "registering pm sysfs failed (%d).\n", r);
+
+	r = amdgpu_ucode_sysfs_init(adev);
+	if (r) {
+		adev->ucode_sysfs_en = false;
+		dev_err(adev->dev, "Creating firmware sysfs failed (%d).\n", r);
+	} else
+		adev->ucode_sysfs_en = true;
+
+	r = amdgpu_device_attr_sysfs_init(adev);
+	if (r)
+		dev_err(adev->dev, "Could not create amdgpu device attr\n");
+
+	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
+	if (r)
+		dev_err(adev->dev,
+			"Could not create amdgpu board attributes\n");
+
+	amdgpu_fru_sysfs_init(adev);
+	amdgpu_reg_state_sysfs_init(adev);
+	amdgpu_xcp_sysfs_init(adev);
+
+	return r;
+}
+
+static void amdgpu_device_sys_interface_fini(struct amdgpu_device *adev)
+{
+	if (adev->pm.sysfs_initialized)
+		amdgpu_pm_sysfs_fini(adev);
+	if (adev->ucode_sysfs_en)
+		amdgpu_ucode_sysfs_fini(adev);
+	amdgpu_device_attr_sysfs_fini(adev);
+	amdgpu_fru_sysfs_fini(adev);
+
+	amdgpu_reg_state_sysfs_fini(adev);
+	amdgpu_xcp_sysfs_fini(adev);
+}
+
+/**
+ * amdgpu_device_init - initialize the driver
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: driver flags
+ *
+ * Initializes the driver info and hw (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver startup.
+ */
+int amdgpu_device_init(struct amdgpu_device *adev,
+		       uint32_t flags)
+{
+	struct pci_dev *pdev = adev->pdev;
+	int r, i;
+	bool px = false;
+	u32 max_MBps;
+	int tmp;
+
+	adev->shutdown = false;
+	adev->flags = flags;
+
+	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
+		adev->asic_type = amdgpu_force_asic_type;
+	else
+		adev->asic_type = flags & AMD_ASIC_MASK;
+
+	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
+	if (amdgpu_emu_mode == 1)
+		adev->usec_timeout *= 10;
+	adev->gmc.gart_size = 512 * 1024 * 1024;
+	adev->accel_working = false;
+	adev->num_rings = 0;
+	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
+	adev->mman.buffer_funcs = NULL;
+	adev->mman.buffer_funcs_ring = NULL;
+	adev->vm_manager.vm_pte_funcs = NULL;
+	adev->vm_manager.vm_pte_num_scheds = 0;
+	adev->gmc.gmc_funcs = NULL;
+	adev->harvest_ip_mask = 0x0;
+	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
+	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	adev->smc_rreg = &amdgpu_invalid_rreg;
+	adev->smc_wreg = &amdgpu_invalid_wreg;
+	adev->pcie_rreg = &amdgpu_invalid_rreg;
+	adev->pcie_wreg = &amdgpu_invalid_wreg;
+	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
+	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
+	adev->pciep_rreg = &amdgpu_invalid_rreg;
+	adev->pciep_wreg = &amdgpu_invalid_wreg;
+	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
+	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
+	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
+	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
+	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
+	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
+	adev->didt_rreg = &amdgpu_invalid_rreg;
+	adev->didt_wreg = &amdgpu_invalid_wreg;
+	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
+	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
+	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
+
+	dev_info(
+		adev->dev,
+		"initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
+		amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
+		pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
+
+	/* mutex initialization are all done here so we
+	 * can recall function without having locking issues
+	 */
+	mutex_init(&adev->firmware.mutex);
+	mutex_init(&adev->pm.mutex);
+	mutex_init(&adev->gfx.gpu_clock_mutex);
+	mutex_init(&adev->srbm_mutex);
+	mutex_init(&adev->gfx.pipe_reserve_mutex);
+	mutex_init(&adev->gfx.gfx_off_mutex);
+	mutex_init(&adev->gfx.partition_mutex);
+	mutex_init(&adev->grbm_idx_mutex);
+	mutex_init(&adev->mn_lock);
+	mutex_init(&adev->virt.vf_errors.lock);
+	hash_init(adev->mn_hash);
+	mutex_init(&adev->psp.mutex);
+	mutex_init(&adev->notifier_lock);
+	mutex_init(&adev->pm.stable_pstate_ctx_lock);
+	mutex_init(&adev->benchmark_mutex);
+	mutex_init(&adev->gfx.reset_sem_mutex);
+	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+	mutex_init(&adev->enforce_isolation_mutex);
+	for (i = 0; i < MAX_XCP; ++i) {
+		adev->isolation[i].spearhead = dma_fence_get_stub();
+		amdgpu_sync_create(&adev->isolation[i].active);
+		amdgpu_sync_create(&adev->isolation[i].prev);
+	}
+	mutex_init(&adev->gfx.userq_sch_mutex);
+	mutex_init(&adev->gfx.workload_profile_mutex);
+	mutex_init(&adev->vcn.workload_profile_mutex);
+
+	amdgpu_device_init_apu_flags(adev);
+
+	r = amdgpu_device_check_arguments(adev);
+	if (r)
+		return r;
+
+	spin_lock_init(&adev->mmio_idx_lock);
+	spin_lock_init(&adev->smc_idx_lock);
+	spin_lock_init(&adev->pcie_idx_lock);
+	spin_lock_init(&adev->uvd_ctx_idx_lock);
+	spin_lock_init(&adev->didt_idx_lock);
+	spin_lock_init(&adev->gc_cac_idx_lock);
+	spin_lock_init(&adev->se_cac_idx_lock);
+	spin_lock_init(&adev->audio_endpt_idx_lock);
+	spin_lock_init(&adev->mm_stats.lock);
+	spin_lock_init(&adev->virt.rlcg_reg_lock);
+	spin_lock_init(&adev->wb.lock);
+
+	xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
+
+	INIT_LIST_HEAD(&adev->reset_list);
+
+	INIT_LIST_HEAD(&adev->ras_list);
+
+	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+
+	xa_init(&adev->userq_doorbell_xa);
+
+	INIT_DELAYED_WORK(&adev->delayed_init_work,
+			  amdgpu_device_delayed_init_work_handler);
+	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+			  amdgpu_device_delay_enable_gfx_off);
+	/*
+	 * Initialize the enforce_isolation work structures for each XCP
+	 * partition.  This work handler is responsible for enforcing shader
+	 * isolation on AMD GPUs.  It counts the number of emitted fences for
+	 * each GFX and compute ring.  If there are any fences, it schedules
+	 * the `enforce_isolation_work` to be run after a delay.  If there are
+	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
+	 * runqueue.
+	 */
+	for (i = 0; i < MAX_XCP; i++) {
+		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
+				  amdgpu_gfx_enforce_isolation_handler);
+		adev->gfx.enforce_isolation[i].adev = adev;
+		adev->gfx.enforce_isolation[i].xcp_id = i;
+	}
+
+	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+	INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work);
+
+	adev->gfx.gfx_off_req_count = 1;
+	adev->gfx.gfx_off_residency = 0;
+	adev->gfx.gfx_off_entrycount = 0;
+	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
+
+	atomic_set(&adev->throttling_logging_enabled, 1);
+	/*
+	 * If throttling continues, logging will be performed every minute
+	 * to avoid log flooding. "-1" is subtracted since the thermal
+	 * throttling interrupt comes every second. Thus, the total logging
+	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
+	 * for throttling interrupt) = 60 seconds.
+	 */
+	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+
+	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
+
+	/* Registers mapping */
+	/* TODO: block userspace mapping of io register */
+	if (adev->asic_type >= CHIP_BONAIRE) {
+		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
+		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
+	} else {
+		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
+		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
+	}
+
+	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
+		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
+
+	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
+	if (!adev->rmmio)
+		return -ENOMEM;
+
+	dev_info(adev->dev, "register mmio base: 0x%08X\n",
+		 (uint32_t)adev->rmmio_base);
+	dev_info(adev->dev, "register mmio size: %u\n",
+		 (unsigned int)adev->rmmio_size);
+
+	/*
+	 * Reset domain needs to be present early, before XGMI hive discovered
+	 * (if any) and initialized to use reset sem and in_gpu reset flag
+	 * early on during init and before calling to RREG32.
+	 */
+	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
+	if (!adev->reset_domain)
+		return -ENOMEM;
+
+	/* detect hw virtualization here */
+	amdgpu_virt_init(adev);
+
+	amdgpu_device_get_pcie_info(adev);
+
+	r = amdgpu_device_get_job_timeout_settings(adev);
+	if (r) {
+		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+		return r;
+	}
+
+	amdgpu_device_set_mcbp(adev);
+
+	/*
+	 * By default, use default mode where all blocks are expected to be
+	 * initialized. At present a 'swinit' of blocks is required to be
+	 * completed before the need for a different level is detected.
+	 */
+	amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
+	/* early init functions */
+	r = amdgpu_device_ip_early_init(adev);
+	if (r)
+		return r;
+
+	/*
+	 * No need to remove conflicting FBs for non-display class devices.
+	 * This prevents the sysfb from being freed accidently.
+	 */
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+		/* Get rid of things like offb */
+		r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
+		if (r)
+			return r;
+	}
+
+	/* Enable TMZ based on IP_VERSION */
+	amdgpu_gmc_tmz_set(adev);
+
+	if (amdgpu_sriov_vf(adev) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+		/* VF MMIO access (except mailbox range) from CPU
+		 * will be blocked during sriov runtime
+		 */
+		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
+	amdgpu_gmc_noretry_set(adev);
+	/* Need to get xgmi info early to decide the reset behavior*/
+	if (adev->gmc.xgmi.supported) {
+		r = adev->gfxhub.funcs->get_xgmi_info(adev);
+		if (r)
+			return r;
+	}
+
+	/* enable PCIE atomic ops */
+	if (amdgpu_sriov_vf(adev)) {
+		if (adev->virt.fw_reserve.p_pf2vf)
+			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
+						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
+				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
+	 * internal path natively support atomics, set have_atomics_support to true.
+	 */
+	} else if ((adev->flags & AMD_IS_APU) &&
+		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
+		    IP_VERSION(9, 0, 0))) {
+		adev->have_atomics_support = true;
+	} else {
+		adev->have_atomics_support =
+			!pci_enable_atomic_ops_to_root(adev->pdev,
+					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+	}
+
+	if (!adev->have_atomics_support)
+		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
+
+	/* doorbell bar mapping and doorbell index init*/
+	amdgpu_doorbell_init(adev);
+
+	if (amdgpu_emu_mode == 1) {
+		/* post the asic on emulation mode */
+		emu_soc_asic_init(adev);
+		goto fence_driver_init;
+	}
+
+	amdgpu_reset_init(adev);
+
+	/* detect if we are with an SRIOV vbios */
+	if (adev->bios)
+		amdgpu_device_detect_sriov_bios(adev);
+
+	/* check if we need to reset the asic
+	 *  E.g., driver was not cleanly unloaded previously, etc.
+	 */
+	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
+		if (adev->gmc.xgmi.num_physical_nodes) {
+			dev_info(adev->dev, "Pending hive reset.\n");
+			amdgpu_set_init_level(adev,
+					      AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+				   !amdgpu_device_has_display_hardware(adev)) {
+					r = psp_gpu_reset(adev);
+		} else {
+				tmp = amdgpu_reset_method;
+				/* It should do a default reset when loading or reloading the driver,
+				 * regardless of the module parameter reset_method.
+				 */
+				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+				r = amdgpu_asic_reset(adev);
+				amdgpu_reset_method = tmp;
+		}
+
+		if (r) {
+		  dev_err(adev->dev, "asic reset on init failed\n");
+		  goto failed;
+		}
+	}
+
+	/* Post card if necessary */
+	if (amdgpu_device_need_post(adev)) {
+		if (!adev->bios) {
+			dev_err(adev->dev, "no vBIOS found\n");
+			r = -EINVAL;
+			goto failed;
+		}
+		dev_info(adev->dev, "GPU posting now...\n");
+		r = amdgpu_device_asic_init(adev);
+		if (r) {
+			dev_err(adev->dev, "gpu post error!\n");
+			goto failed;
+		}
+	}
+
+	if (adev->bios) {
+		if (adev->is_atom_fw) {
+			/* Initialize clocks */
+			r = amdgpu_atomfirmware_get_clock_info(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+				goto failed;
+			}
+		} else {
+			/* Initialize clocks */
+			r = amdgpu_atombios_get_clock_info(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
+				goto failed;
+			}
+			/* init i2c buses */
+			amdgpu_i2c_init(adev);
+		}
+	}
+
+fence_driver_init:
+	/* Fence driver */
+	r = amdgpu_fence_driver_sw_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
+		goto failed;
+	}
+
+	/* init the mode config */
+	drm_mode_config_init(adev_to_drm(adev));
+
+	r = amdgpu_device_ip_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+		goto release_ras_con;
+	}
+
+	amdgpu_fence_driver_hw_init(adev);
+
+	dev_info(adev->dev,
+		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+			adev->gfx.config.max_shader_engines,
+			adev->gfx.config.max_sh_per_se,
+			adev->gfx.config.max_cu_per_sh,
+			adev->gfx.cu_info.number);
+
+	adev->accel_working = true;
+
+	amdgpu_vm_check_compute_bug(adev);
+
+	/* Initialize the buffer migration limit. */
+	if (amdgpu_moverate >= 0)
+		max_MBps = amdgpu_moverate;
+	else
+		max_MBps = 8; /* Allow 8 MB/s. */
+	/* Get a log2 for easy divisions. */
+	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
+
+	/*
+	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+	 * Otherwise the mgpu fan boost feature will be skipped due to the
+	 * gpu instance is counted less.
+	 */
+	amdgpu_register_gpu_instance(adev);
+
+	/* enable clockgating, etc. after ib tests, etc. since some blocks require
+	 * explicit gating rather than handling it automatically.
+	 */
+	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+		r = amdgpu_device_ip_late_init(adev);
+		if (r) {
+			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
+			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
+			goto release_ras_con;
+		}
+		/* must succeed. */
+		amdgpu_ras_resume(adev);
+		queue_delayed_work(system_wq, &adev->delayed_init_work,
+				   msecs_to_jiffies(AMDGPU_RESUME_MS));
+	}
+
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_release_full_gpu(adev, true);
+		flush_delayed_work(&adev->delayed_init_work);
+	}
+
+	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+		amdgpu_xgmi_reset_on_init(adev);
+	/*
+	 * Place those sysfs registering after `late_init`. As some of those
+	 * operations performed in `late_init` might affect the sysfs
+	 * interfaces creating.
+	 */
+	r = amdgpu_device_sys_interface_init(adev);
+
+	if (IS_ENABLED(CONFIG_PERF_EVENTS))
+		r = amdgpu_pmu_init(adev);
+	if (r)
+		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
+
+	/* Have stored pci confspace at hand for restore in sudden PCI error */
+	if (amdgpu_device_cache_pci_state(adev->pdev))
+		pci_restore_state(pdev);
+
+	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
+	/* this will fail for cards that aren't VGA class devices, just
+	 * ignore it
+	 */
+	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
+
+	px = amdgpu_device_supports_px(adev);
+
+	if (px || (!dev_is_removable(&adev->pdev->dev) &&
+				apple_gmux_detect(NULL, NULL)))
+		vga_switcheroo_register_client(adev->pdev,
+					       &amdgpu_switcheroo_ops, px);
+
+	if (px)
+		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+
+	amdgpu_device_check_iommu_direct_map(adev);
+
+	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+	r = register_pm_notifier(&adev->pm_nb);
+	if (r)
+		goto failed;
+
+	return 0;
+
+release_ras_con:
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_release_full_gpu(adev, true);
+
+	/* failed in exclusive mode due to timeout */
+	if (amdgpu_sriov_vf(adev) &&
+		!amdgpu_sriov_runtime(adev) &&
+		amdgpu_virt_mmio_blocked(adev) &&
+		!amdgpu_virt_wait_reset(adev)) {
+		dev_err(adev->dev, "VF exclusive mode timeout\n");
+		/* Don't send request since VF is inactive. */
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+		adev->virt.ops = NULL;
+		r = -EAGAIN;
+	}
+	amdgpu_release_ras_context(adev);
+
+failed:
+	amdgpu_vf_error_trans_all(adev);
+
+	return r;
+}
+
+static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
+{
+
+	/* Clear all CPU mappings pointing to this device */
+	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
+
+	/* Unmap all mapped bars - Doorbell, registers and VRAM */
+	amdgpu_doorbell_fini(adev);
+
+	iounmap(adev->rmmio);
+	adev->rmmio = NULL;
+	if (adev->mman.aper_base_kaddr)
+		iounmap(adev->mman.aper_base_kaddr);
+	adev->mman.aper_base_kaddr = NULL;
+
+	/* Memory manager related */
+	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+		arch_phys_wc_del(adev->gmc.vram_mtrr);
+		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+	}
+}
+
+/**
+ * amdgpu_device_fini_hw - tear down the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the driver info (all asics).
+ * Called at driver shutdown.
+ */
+void amdgpu_device_fini_hw(struct amdgpu_device *adev)
+{
+	dev_info(adev->dev, "amdgpu: finishing device.\n");
+	flush_delayed_work(&adev->delayed_init_work);
+
+	if (adev->mman.initialized)
+		drain_workqueue(adev->mman.bdev.wq);
+	adev->shutdown = true;
+
+	unregister_pm_notifier(&adev->pm_nb);
+
+	/* make sure IB test finished before entering exclusive mode
+	 * to avoid preemption on IB test
+	 */
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_request_full_gpu(adev, false);
+		amdgpu_virt_fini_data_exchange(adev);
+	}
+
+	/* disable all interrupts */
+	amdgpu_irq_disable_all(adev);
+	if (adev->mode_info.mode_config_initialized) {
+		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
+			drm_helper_force_disable_all(adev_to_drm(adev));
+		else
+			drm_atomic_helper_shutdown(adev_to_drm(adev));
+	}
+	amdgpu_fence_driver_hw_fini(adev);
+
+	amdgpu_device_sys_interface_fini(adev);
+
+	/* disable ras feature must before hw fini */
+	amdgpu_ras_pre_fini(adev);
+
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+	amdgpu_device_ip_fini_early(adev);
+
+	amdgpu_irq_fini_hw(adev);
+
+	if (adev->mman.initialized)
+		ttm_device_clear_dma_mappings(&adev->mman.bdev);
+
+	amdgpu_gart_dummy_page_fini(adev);
+
+	if (drm_dev_is_unplugged(adev_to_drm(adev)))
+		amdgpu_device_unmap_mmio(adev);
+
+}
+
+void amdgpu_device_fini_sw(struct amdgpu_device *adev)
+{
+	int i, idx;
+	bool px;
+
+	amdgpu_device_ip_fini(adev);
+	amdgpu_fence_driver_sw_fini(adev);
+	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
+	adev->accel_working = false;
+	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+	for (i = 0; i < MAX_XCP; ++i) {
+		dma_fence_put(adev->isolation[i].spearhead);
+		amdgpu_sync_free(&adev->isolation[i].active);
+		amdgpu_sync_free(&adev->isolation[i].prev);
+	}
+
+	amdgpu_reset_fini(adev);
+
+	/* free i2c buses */
+	amdgpu_i2c_fini(adev);
+
+	if (adev->bios) {
+		if (amdgpu_emu_mode != 1)
+			amdgpu_atombios_fini(adev);
+		amdgpu_bios_release(adev);
+	}
+
+	kfree(adev->fru_info);
+	adev->fru_info = NULL;
+
+	kfree(adev->xcp_mgr);
+	adev->xcp_mgr = NULL;
+
+	px = amdgpu_device_supports_px(adev);
+
+	if (px || (!dev_is_removable(&adev->pdev->dev) &&
+				apple_gmux_detect(NULL, NULL)))
+		vga_switcheroo_unregister_client(adev->pdev);
+
+	if (px)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
+
+	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+		vga_client_unregister(adev->pdev);
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+		iounmap(adev->rmmio);
+		adev->rmmio = NULL;
+		drm_dev_exit(idx);
+	}
+
+	if (IS_ENABLED(CONFIG_PERF_EVENTS))
+		amdgpu_pmu_fini(adev);
+	if (adev->discovery.bin)
+		amdgpu_discovery_fini(adev);
+
+	amdgpu_reset_put_reset_domain(adev->reset_domain);
+	adev->reset_domain = NULL;
+
+	kfree(adev->pci_state);
+	kfree(adev->pcie_reset_ctx.swds_pcistate);
+	kfree(adev->pcie_reset_ctx.swus_pcistate);
+}
+
+/**
+ * amdgpu_device_evict_resources - evict device resources
+ * @adev: amdgpu device object
+ *
+ * Evicts all ttm device resources(vram BOs, gart table) from the lru list
+ * of the vram memory type. Mainly used for evicting device resources
+ * at suspend time.
+ *
+ */
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
+{
+	int ret;
+
+	/* No need to evict vram on APUs unless going to S4 */
+	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
+		return 0;
+
+	/* No need to evict when going to S5 through S4 callbacks */
+	if (system_state == SYSTEM_POWER_OFF)
+		return 0;
+
+	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+	if (ret) {
+		dev_warn(adev->dev, "evicting device resources failed\n");
+		return ret;
+	}
+
+	if (adev->in_s4) {
+		ret = ttm_device_prepare_hibernation(&adev->mman.bdev);
+		if (ret)
+			dev_err(adev->dev, "prepare hibernation failed, %d\n", ret);
+	}
+	return ret;
+}
+
+/*
+ * Suspend & resume.
+ */
+/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+				     void *data)
+{
+	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+
+	switch (mode) {
+	case PM_HIBERNATION_PREPARE:
+		adev->in_s4 = true;
+		break;
+	case PM_POST_HIBERNATION:
+		adev->in_s4 = false;
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * amdgpu_device_prepare - prepare for device suspend
+ *
+ * @dev: drm dev pointer
+ *
+ * Prepare to put the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_prepare(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int i, r;
+
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
+	/* Evict the majority of BOs before starting suspend sequence */
+	r = amdgpu_device_evict_resources(adev);
+	if (r)
+		return r;
+
+	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
+			continue;
+		r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_complete - complete power state transition
+ *
+ * @dev: drm dev pointer
+ *
+ * Undo the changes from amdgpu_device_prepare. This will be
+ * called on all resume transitions, including those that failed.
+ */
+void amdgpu_device_complete(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (!adev->ip_blocks[i].version->funcs->complete)
+			continue;
+		adev->ip_blocks[i].version->funcs->complete(&adev->ip_blocks[i]);
+	}
+}
+
+/**
+ * amdgpu_device_suspend - initiate device suspend
+ *
+ * @dev: drm dev pointer
+ * @notify_clients: notify in-kernel DRM clients
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int r, rec;
+
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
+	adev->in_suspend = true;
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (!adev->in_runpm)
+			amdgpu_amdkfd_suspend_process(adev);
+		amdgpu_virt_fini_data_exchange(adev);
+		r = amdgpu_virt_request_full_gpu(adev, false);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
+	if (r)
+		goto unwind_sriov;
+
+	if (notify_clients)
+		drm_client_dev_suspend(adev_to_drm(adev));
+
+	cancel_delayed_work_sync(&adev->delayed_init_work);
+
+	amdgpu_ras_suspend(adev);
+
+	r = amdgpu_device_ip_suspend_phase1(adev);
+	if (r)
+		goto unwind_smartshift;
+
+	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+	r = amdgpu_userq_suspend(adev);
+	if (r)
+		goto unwind_ip_phase1;
+
+	r = amdgpu_device_evict_resources(adev);
+	if (r)
+		goto unwind_userq;
+
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+	amdgpu_fence_driver_hw_fini(adev);
+
+	r = amdgpu_device_ip_suspend_phase2(adev);
+	if (r)
+		goto unwind_evict;
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_release_full_gpu(adev, false);
+
+	return 0;
+
+unwind_evict:
+	if (adev->mman.buffer_funcs_ring->sched.ready)
+		amdgpu_ttm_set_buffer_funcs_status(adev, true);
+	amdgpu_fence_driver_hw_init(adev);
+
+unwind_userq:
+	rec = amdgpu_userq_resume(adev);
+	if (rec) {
+		dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
+		return r;
+	}
+	rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+	if (rec) {
+		dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
+		return r;
+	}
+
+unwind_ip_phase1:
+	/* suspend phase 1 = resume phase 3 */
+	rec = amdgpu_device_ip_resume_phase3(adev);
+	if (rec) {
+		dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
+		return r;
+	}
+
+unwind_smartshift:
+	rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
+	if (rec) {
+		dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
+		return r;
+	}
+
+	if (notify_clients)
+		drm_client_dev_resume(adev_to_drm(adev));
+
+	amdgpu_ras_resume(adev);
+
+unwind_sriov:
+	if (amdgpu_sriov_vf(adev)) {
+		rec = amdgpu_virt_request_full_gpu(adev, true);
+		if (rec) {
+			dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
+			return r;
+		}
+	}
+
+	adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
+
+	return r;
+}
+
+static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
+{
+	int r;
+	unsigned int prev_physical_node_id = adev->gmc.xgmi.physical_node_id;
+
+	/* During VM resume, QEMU programming of VF MSIX table (register GFXMSIX_VECT0_ADDR_LO)
+	 * may not work. The access could be blocked by nBIF protection as VF isn't in
+	 * exclusive access mode. Exclusive access is enabled now, disable/enable MSIX
+	 * so that QEMU reprograms MSIX table.
+	 */
+	amdgpu_restore_msix(adev);
+
+	r = adev->gfxhub.funcs->get_xgmi_info(adev);
+	if (r)
+		return r;
+
+	dev_info(adev->dev, "xgmi node, old id %d, new id %d\n",
+		prev_physical_node_id, adev->gmc.xgmi.physical_node_id);
+
+	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
+	adev->vm_manager.vram_base_offset +=
+		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_resume - initiate device resume
+ *
+ * @dev: drm dev pointer
+ * @notify_clients: notify in-kernel DRM clients
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
+int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int r = 0;
+
+	if (amdgpu_sriov_vf(adev)) {
+		r = amdgpu_virt_request_full_gpu(adev, true);
+		if (r)
+			return r;
+	}
+
+	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+		r = amdgpu_virt_resume(adev);
+		if (r)
+			goto exit;
+	}
+
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
+	if (adev->in_s0ix)
+		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
+
+	/* post card */
+	if (amdgpu_device_need_post(adev)) {
+		r = amdgpu_device_asic_init(adev);
+		if (r)
+			dev_err(adev->dev, "amdgpu asic init failed\n");
+	}
+
+	r = amdgpu_device_ip_resume(adev);
+
+	if (r) {
+		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+		goto exit;
+	}
+
+	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
+	if (r)
+		goto exit;
+
+	r = amdgpu_userq_resume(adev);
+	if (r)
+		goto exit;
+
+	r = amdgpu_device_ip_late_init(adev);
+	if (r)
+		goto exit;
+
+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
+exit:
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_data_exchange(adev);
+		amdgpu_virt_release_full_gpu(adev, true);
+
+		if (!r && !adev->in_runpm)
+			r = amdgpu_amdkfd_resume_process(adev);
+	}
+
+	if (r)
+		return r;
+
+	/* Make sure IB tests flushed */
+	flush_delayed_work(&adev->delayed_init_work);
+
+	if (notify_clients)
+		drm_client_dev_resume(adev_to_drm(adev));
+
+	amdgpu_ras_resume(adev);
+
+	if (adev->mode_info.num_crtc) {
+		/*
+		 * Most of the connector probing functions try to acquire runtime pm
+		 * refs to ensure that the GPU is powered on when connector polling is
+		 * performed. Since we're calling this from a runtime PM callback,
+		 * trying to acquire rpm refs will cause us to deadlock.
+		 *
+		 * Since we're guaranteed to be holding the rpm lock, it's safe to
+		 * temporarily disable the rpm helpers so this doesn't deadlock us.
+		 */
+#ifdef CONFIG_PM
+		dev->dev->power.disable_depth++;
+#endif
+		if (!adev->dc_enabled)
+			drm_helper_hpd_irq_event(dev);
+		else
+			drm_kms_helper_hotplug_event(dev);
+#ifdef CONFIG_PM
+		dev->dev->power.disable_depth--;
+#endif
+	}
+
+	amdgpu_vram_mgr_clear_reset_blocks(adev);
+	adev->in_suspend = false;
+
+	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
+		dev_warn(adev->dev, "smart shift update failed\n");
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_check_soft_reset - did soft reset succeed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and
+ * the check_soft_reset callbacks are run.  check_soft_reset determines
+ * if the asic is still hung or not.
+ * Returns true if any of the IPs are still in a hung state, false if not.
+ */
+static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
+{
+	int i;
+	bool asic_hang = false;
+
+	if (amdgpu_sriov_vf(adev))
+		return true;
+
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
+			adev->ip_blocks[i].status.hang =
+				adev->ip_blocks[i].version->funcs->check_soft_reset(
+					&adev->ip_blocks[i]);
+		if (adev->ip_blocks[i].status.hang) {
+			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
+			asic_hang = true;
+		}
+	}
+	return asic_hang;
+}
+
+/**
+ * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary for a soft reset to succeed.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
+{
+	int i, r = 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
+			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
+ * reset is necessary to recover.
+ * Returns true if a full asic reset is required, false if not.
+ */
+static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
+		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+			if (adev->ip_blocks[i].status.hang) {
+				dev_info(adev->dev, "Some block need full reset!\n");
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+/**
+ * amdgpu_device_ip_soft_reset - do a soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * soft_reset callbacks are run if the block is hung.  soft_reset handles any
+ * IP specific hardware or software state changes that are necessary to soft
+ * reset the IP.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
+{
+	int i, r = 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->soft_reset) {
+			r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_post_soft_reset - clean up from soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary after the IP has been soft reset.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
+{
+	int i, r = 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (adev->ip_blocks[i].status.hang &&
+		    adev->ip_blocks[i].version->funcs->post_soft_reset)
+			r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
+ *
+ * @adev: amdgpu_device pointer
+ * @reset_context: amdgpu reset context pointer
+ *
+ * do VF FLR and reinitialize Asic
+ * return 0 means succeeded otherwise failed
+ */
+static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+				     struct amdgpu_reset_context *reset_context)
+{
+	int r;
+	struct amdgpu_hive_info *hive = NULL;
+
+	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
+		if (!amdgpu_ras_get_fed_status(adev))
+			amdgpu_virt_ready_to_reset(adev);
+		amdgpu_virt_wait_reset(adev);
+		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
+		r = amdgpu_virt_request_full_gpu(adev, true);
+	} else {
+		r = amdgpu_virt_reset_gpu(adev);
+	}
+	if (r)
+		return r;
+
+	amdgpu_ras_clear_err_state(adev);
+	amdgpu_irq_gpu_reset_resume_helper(adev);
+
+	/* some sw clean up VF needs to do before recover */
+	amdgpu_virt_post_reset(adev);
+
+	/* Resume IP prior to SMC */
+	r = amdgpu_device_ip_reinit_early_sriov(adev);
+	if (r)
+		return r;
+
+	amdgpu_virt_init_data_exchange(adev);
+
+	r = amdgpu_device_fw_loading(adev);
+	if (r)
+		return r;
+
+	/* now we are okay to resume SMC/CP/SDMA */
+	r = amdgpu_device_ip_reinit_late_sriov(adev);
+	if (r)
+		return r;
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	/* Update PSP FW topology after reset */
+	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+		r = amdgpu_xgmi_update_topology(hive, adev);
+	if (hive)
+		amdgpu_put_xgmi_hive(hive);
+	if (r)
+		return r;
+
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		return r;
+
+	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
+		amdgpu_inc_vram_lost(adev);
+
+	/* need to be called during full access so we can't do it later like
+	 * bare-metal does.
+	 */
+	amdgpu_amdkfd_post_reset(adev);
+	amdgpu_virt_release_full_gpu(adev, true);
+
+	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
+		amdgpu_ras_resume(adev);
+
+	amdgpu_virt_ras_telemetry_post_reset(adev);
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_has_job_running - check if there is any unfinished job
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
+ */
+bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!amdgpu_ring_sched_ready(ring))
+			continue;
+
+		if (amdgpu_fence_count_emitted(ring))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
+ * a hung GPU.
+ */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
+{
+
+	if (amdgpu_gpu_recovery == 0)
+		goto disabled;
+
+	/* Skip soft reset check in fatal error mode */
+	if (!amdgpu_ras_is_poison_mode_supported(adev))
+		return true;
+
+	if (amdgpu_sriov_vf(adev))
+		return true;
+
+	if (amdgpu_gpu_recovery == -1) {
+		switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+		case CHIP_VERDE:
+		case CHIP_TAHITI:
+		case CHIP_PITCAIRN:
+		case CHIP_OLAND:
+		case CHIP_HAINAN:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+		case CHIP_KAVERI:
+		case CHIP_KABINI:
+		case CHIP_MULLINS:
+#endif
+		case CHIP_CARRIZO:
+		case CHIP_STONEY:
+		case CHIP_CYAN_SKILLFISH:
+			goto disabled;
+		default:
+			break;
+		}
+	}
+
+	return true;
+
+disabled:
+		dev_info(adev->dev, "GPU recovery disabled.\n");
+		return false;
+}
+
+int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
+{
+	u32 i;
+	int ret = 0;
+
+	if (adev->bios)
+		amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+	dev_info(adev->dev, "GPU mode1 reset\n");
+
+	/* Cache the state before bus master disable. The saved config space
+	 * values are used in other cases like restore after mode-2 reset.
+	 */
+	amdgpu_device_cache_pci_state(adev->pdev);
+
+	/* disable BM */
+	pci_clear_master(adev->pdev);
+
+	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+		dev_info(adev->dev, "GPU smu mode1 reset\n");
+		ret = amdgpu_dpm_mode1_reset(adev);
+	} else {
+		dev_info(adev->dev, "GPU psp mode1 reset\n");
+		ret = psp_gpu_reset(adev);
+	}
+
+	if (ret)
+		goto mode1_reset_failed;
+
+	amdgpu_device_load_pci_state(adev->pdev);
+	ret = amdgpu_psp_wait_for_bootloader(adev);
+	if (ret)
+		goto mode1_reset_failed;
+
+	/* wait for asic to come out of reset */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+		if (memsize != 0xffffffff)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		ret = -ETIMEDOUT;
+		goto mode1_reset_failed;
+	}
+
+	if (adev->bios)
+		amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+	return 0;
+
+mode1_reset_failed:
+	dev_err(adev->dev, "GPU mode1 reset failed\n");
+	return ret;
+}
+
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	dev_info(adev->dev, "GPU link reset\n");
+
+	if (!amdgpu_reset_in_dpc(adev))
+		ret = amdgpu_dpm_link_reset(adev);
+
+	if (ret)
+		goto link_reset_failed;
+
+	ret = amdgpu_psp_wait_for_bootloader(adev);
+	if (ret)
+		goto link_reset_failed;
+
+	return 0;
+
+link_reset_failed:
+	dev_err(adev->dev, "GPU link reset failed\n");
+	return ret;
+}
+
+int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
+				 struct amdgpu_reset_context *reset_context)
+{
+	int i, r = 0;
+	struct amdgpu_job *job = NULL;
+	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
+	bool need_full_reset =
+		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+	if (reset_context->reset_req_dev == adev)
+		job = reset_context->job;
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_pre_reset(adev);
+
+	amdgpu_fence_driver_isr_toggle(adev, true);
+
+	/* block all schedulers and reset given job's ring */
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!amdgpu_ring_sched_ready(ring))
+			continue;
+
+		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+		amdgpu_fence_driver_force_completion(ring);
+	}
+
+	amdgpu_fence_driver_isr_toggle(adev, false);
+
+	if (job && job->vm)
+		drm_sched_increase_karma(&job->base);
+
+	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+	/* If reset handler not implemented, continue; otherwise return */
+	if (r == -EOPNOTSUPP)
+		r = 0;
+	else
+		return r;
+
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+	if (!amdgpu_sriov_vf(adev)) {
+
+		if (!need_full_reset)
+			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
+
+		if (!need_full_reset && amdgpu_gpu_recovery &&
+		    amdgpu_device_ip_check_soft_reset(adev)) {
+			amdgpu_device_ip_pre_soft_reset(adev);
+			r = amdgpu_device_ip_soft_reset(adev);
+			amdgpu_device_ip_post_soft_reset(adev);
+			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
+				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
+				need_full_reset = true;
+			}
+		}
+
+		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
+			dev_info(tmp_adev->dev, "Dumping IP State\n");
+			/* Trigger ip dump before we reset the asic */
+			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
+				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
+					tmp_adev->ip_blocks[i].version->funcs
+						->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
+			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
+		}
+
+		if (need_full_reset)
+			r = amdgpu_device_ip_suspend(adev);
+		if (need_full_reset)
+			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+		else
+			clear_bit(AMDGPU_NEED_FULL_RESET,
+				  &reset_context->flags);
+	}
+
+	return r;
+}
+
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
+{
+	struct list_head *device_list_handle;
+	bool full_reset, vram_lost = false;
+	struct amdgpu_device *tmp_adev;
+	int r, init_level;
+
+	device_list_handle = reset_context->reset_device_list;
+
+	if (!device_list_handle)
+		return -EINVAL;
+
+	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+	/**
+	 * If it's reset on init, it's default init level, otherwise keep level
+	 * as recovery level.
+	 */
+	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+			init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+	else
+			init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
+
+	r = 0;
+	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+		amdgpu_set_init_level(tmp_adev, init_level);
+		if (full_reset) {
+			/* post card */
+			amdgpu_reset_set_dpc_status(tmp_adev, false);
+			amdgpu_ras_clear_err_state(tmp_adev);
+			r = amdgpu_device_asic_init(tmp_adev);
+			if (r) {
+				dev_warn(tmp_adev->dev, "asic atom init failed!");
+			} else {
+				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+
+				r = amdgpu_device_ip_resume_phase1(tmp_adev);
+				if (r)
+					goto out;
+
+				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
+
+				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
+					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
+
+				if (vram_lost) {
+					dev_info(
+						tmp_adev->dev,
+						"VRAM is lost due to GPU reset!\n");
+					amdgpu_inc_vram_lost(tmp_adev);
+				}
+
+				r = amdgpu_device_fw_loading(tmp_adev);
+				if (r)
+					return r;
+
+				r = amdgpu_xcp_restore_partition_mode(
+					tmp_adev->xcp_mgr);
+				if (r)
+					goto out;
+
+				r = amdgpu_device_ip_resume_phase2(tmp_adev);
+				if (r)
+					goto out;
+
+				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
+				r = amdgpu_device_ip_resume_phase3(tmp_adev);
+				if (r)
+					goto out;
+
+				if (vram_lost)
+					amdgpu_device_fill_reset_magic(tmp_adev);
+
+				/*
+				 * Add this ASIC as tracked as reset was already
+				 * complete successfully.
+				 */
+				amdgpu_register_gpu_instance(tmp_adev);
+
+				if (!reset_context->hive &&
+				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+					amdgpu_xgmi_add_device(tmp_adev);
+
+				r = amdgpu_device_ip_late_init(tmp_adev);
+				if (r)
+					goto out;
+
+				r = amdgpu_userq_post_reset(tmp_adev, vram_lost);
+				if (r)
+					goto out;
+
+				drm_client_dev_resume(adev_to_drm(tmp_adev));
+
+				/*
+				 * The GPU enters bad state once faulty pages
+				 * by ECC has reached the threshold, and ras
+				 * recovery is scheduled next. So add one check
+				 * here to break recovery if it indeed exceeds
+				 * bad page threshold, and remind user to
+				 * retire this GPU or setting one bigger
+				 * bad_page_threshold value to fix this once
+				 * probing driver again.
+				 */
+				if (!amdgpu_ras_is_rma(tmp_adev)) {
+					/* must succeed. */
+					amdgpu_ras_resume(tmp_adev);
+				} else {
+					r = -EINVAL;
+					goto out;
+				}
+
+				/* Update PSP FW topology after reset */
+				if (reset_context->hive &&
+				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
+					r = amdgpu_xgmi_update_topology(
+						reset_context->hive, tmp_adev);
+			}
+		}
+
+out:
+		if (!r) {
+			/* IP init is complete now, set level as default */
+			amdgpu_set_init_level(tmp_adev,
+					      AMDGPU_INIT_LEVEL_DEFAULT);
+			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+			r = amdgpu_ib_ring_tests(tmp_adev);
+			if (r) {
+				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
+				r = -EAGAIN;
+				goto end;
+			}
+		}
+
+		if (r)
+			tmp_adev->asic_reset_res = r;
+	}
+
+end:
+	return r;
+}
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+			 struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	bool need_full_reset, skip_hw_reset;
+	int r = 0;
+
+	/* Try reset handler method first */
+	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+				    reset_list);
+
+	reset_context->reset_device_list = device_list_handle;
+	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+	/* If reset handler not implemented, continue; otherwise return */
+	if (r == -EOPNOTSUPP)
+		r = 0;
+	else
+		return r;
+
+	/* Reset handler not implemented, use the default method */
+	need_full_reset =
+		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+	/*
+	 * ASIC reset has to be done on all XGMI hive nodes ASAP
+	 * to allow proper links negotiation in FW (within 1 sec)
+	 */
+	if (!skip_hw_reset && need_full_reset) {
+		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+			/* For XGMI run all resets in parallel to speed up the process */
+			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+				if (!queue_work(system_unbound_wq,
+						&tmp_adev->xgmi_reset_work))
+					r = -EALREADY;
+			} else
+				r = amdgpu_asic_reset(tmp_adev);
+
+			if (r) {
+				dev_err(tmp_adev->dev,
+					"ASIC reset failed with error, %d for drm dev, %s",
+					r, adev_to_drm(tmp_adev)->unique);
+				goto out;
+			}
+		}
+
+		/* For XGMI wait for all resets to complete before proceed */
+		if (!r) {
+			list_for_each_entry(tmp_adev, device_list_handle,
+					    reset_list) {
+				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+					flush_work(&tmp_adev->xgmi_reset_work);
+					r = tmp_adev->asic_reset_res;
+					if (r)
+						break;
+				}
+			}
+		}
+	}
+
+	if (!r && amdgpu_ras_intr_triggered()) {
+		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+			amdgpu_ras_reset_error_count(tmp_adev,
+						     AMDGPU_RAS_BLOCK__MMHUB);
+		}
+
+		amdgpu_ras_intr_cleared();
+	}
+
+	r = amdgpu_device_reinit_after_reset(reset_context);
+	if (r == -EAGAIN)
+		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+	else
+		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+out:
+	return r;
+}
+
+static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
+{
+
+	switch (amdgpu_asic_reset_method(adev)) {
+	case AMD_RESET_METHOD_MODE1:
+	case AMD_RESET_METHOD_LINK:
+		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
+		break;
+	case AMD_RESET_METHOD_MODE2:
+		adev->mp1_state = PP_MP1_STATE_RESET;
+		break;
+	default:
+		adev->mp1_state = PP_MP1_STATE_NONE;
+		break;
+	}
+}
+
+static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
+{
+	amdgpu_vf_error_trans_all(adev);
+	adev->mp1_state = PP_MP1_STATE_NONE;
+}
+
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+	struct pci_dev *p = NULL;
+
+	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+			adev->pdev->bus->number, 1);
+	if (p) {
+		pm_runtime_enable(&(p->dev));
+		pm_runtime_resume(&(p->dev));
+	}
+
+	pci_dev_put(p);
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+	enum amd_reset_method reset_method;
+	struct pci_dev *p = NULL;
+	u64 expires;
+
+	/*
+	 * For now, only BACO and mode1 reset are confirmed
+	 * to suffer the audio issue without proper suspended.
+	 */
+	reset_method = amdgpu_asic_reset_method(adev);
+	if ((reset_method != AMD_RESET_METHOD_BACO) &&
+	     (reset_method != AMD_RESET_METHOD_MODE1))
+		return -EINVAL;
+
+	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+			adev->pdev->bus->number, 1);
+	if (!p)
+		return -ENODEV;
+
+	expires = pm_runtime_autosuspend_expiration(&(p->dev));
+	if (!expires)
+		/*
+		 * If we cannot get the audio device autosuspend delay,
+		 * a fixed 4S interval will be used. Considering 3S is
+		 * the audio controller default autosuspend delay setting.
+		 * 4S used here is guaranteed to cover that.
+		 */
+		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
+
+	while (!pm_runtime_status_suspended(&(p->dev))) {
+		if (!pm_runtime_suspend(&(p->dev)))
+			break;
+
+		if (expires < ktime_get_mono_fast_ns()) {
+			dev_warn(adev->dev, "failed to suspend display audio\n");
+			pci_dev_put(p);
+			/* TODO: abort the succeeding gpu reset? */
+			return -ETIMEDOUT;
+		}
+	}
+
+	pm_runtime_disable(&(p->dev));
+
+	pci_dev_put(p);
+	return 0;
+}
+
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+#if defined(CONFIG_DEBUG_FS)
+	if (!amdgpu_sriov_vf(adev))
+		cancel_work(&adev->reset_work);
+#endif
+	cancel_work(&adev->userq_reset_work);
+
+	if (adev->kfd.dev)
+		cancel_work(&adev->kfd.reset_work);
+
+	if (amdgpu_sriov_vf(adev))
+		cancel_work(&adev->virt.flr_work);
+
+	if (con && adev->ras_enabled)
+		cancel_work(&con->recovery_work);
+
+}
+
+static int amdgpu_device_health_check(struct list_head *device_list_handle)
+{
+	struct amdgpu_device *tmp_adev;
+	int ret = 0;
+
+	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+		ret |= amdgpu_device_bus_status_check(tmp_adev);
+	}
+
+	return ret;
+}
+
+static void amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+					  struct list_head *device_list,
+					  struct amdgpu_hive_info *hive)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	/*
+	 * Build list of devices to reset.
+	 * In case we are in XGMI hive mode, resort the device list
+	 * to put adev in the 1st position.
+	 */
+	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			list_add_tail(&tmp_adev->reset_list, device_list);
+			if (adev->shutdown)
+				tmp_adev->shutdown = true;
+			if (amdgpu_reset_in_dpc(adev))
+				tmp_adev->pcie_reset_ctx.in_link_reset = true;
+		}
+		if (!list_is_first(&adev->reset_list, device_list))
+			list_rotate_to_front(&adev->reset_list, device_list);
+	} else {
+		list_add_tail(&adev->reset_list, device_list);
+	}
+}
+
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+						  struct list_head *device_list)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	if (list_empty(device_list))
+		return;
+	tmp_adev =
+		list_first_entry(device_list, struct amdgpu_device, reset_list);
+	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+						  struct list_head *device_list)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	if (list_empty(device_list))
+		return;
+	tmp_adev =
+		list_first_entry(device_list, struct amdgpu_device, reset_list);
+	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static void amdgpu_device_halt_activities(struct amdgpu_device *adev,
+					  struct amdgpu_job *job,
+					  struct amdgpu_reset_context *reset_context,
+					  struct list_head *device_list,
+					  struct amdgpu_hive_info *hive,
+					  bool need_emergency_restart)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	int i;
+
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		amdgpu_device_set_mp1_state(tmp_adev);
+
+		/*
+		 * Try to put the audio codec into suspend state
+		 * before gpu reset started.
+		 *
+		 * Due to the power domain of the graphics device
+		 * is shared with AZ power domain. Without this,
+		 * we may change the audio hardware from behind
+		 * the audio driver's back. That will trigger
+		 * some audio codec errors.
+		 */
+		if (!amdgpu_device_suspend_display_audio(tmp_adev))
+			tmp_adev->pcie_reset_ctx.audio_suspended = true;
+
+		amdgpu_ras_set_error_query_ready(tmp_adev, false);
+
+		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
+
+		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
+
+		/*
+		 * Mark these ASICs to be reset as untracked first
+		 * And add them back after reset completed
+		 */
+		amdgpu_unregister_gpu_instance(tmp_adev);
+
+		drm_client_dev_suspend(adev_to_drm(tmp_adev));
+
+		/* disable ras on ALL IPs */
+		if (!need_emergency_restart && !amdgpu_reset_in_dpc(adev) &&
+		    amdgpu_device_ip_need_full_reset(tmp_adev))
+			amdgpu_ras_suspend(tmp_adev);
+
+		amdgpu_userq_pre_reset(tmp_adev);
+
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!amdgpu_ring_sched_ready(ring))
+				continue;
+
+			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+
+			if (need_emergency_restart)
+				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
+		}
+		atomic_inc(&tmp_adev->gpu_reset_counter);
+	}
+}
+
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+			      struct list_head *device_list,
+			      struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+	int r = 0;
+
+retry:	/* Rest of adevs pre asic reset from XGMI hive. */
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
+		/*TODO Should we stop ?*/
+		if (r) {
+			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
+				  r, adev_to_drm(tmp_adev)->unique);
+			tmp_adev->asic_reset_res = r;
+		}
+	}
+
+	/* Actual ASIC resets if needed.*/
+	/* Host driver will handle XGMI hive reset for SRIOV */
+	if (amdgpu_sriov_vf(adev)) {
+
+		/* Bail out of reset early */
+		if (amdgpu_ras_is_rma(adev))
+			return -ENODEV;
+
+		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
+			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
+			amdgpu_ras_set_fed(adev, true);
+			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
+		}
+
+		r = amdgpu_device_reset_sriov(adev, reset_context);
+		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
+			amdgpu_virt_release_full_gpu(adev, true);
+			goto retry;
+		}
+		if (r)
+			adev->asic_reset_res = r;
+	} else {
+		r = amdgpu_do_asic_reset(device_list, reset_context);
+		if (r && r == -EAGAIN)
+			goto retry;
+	}
+
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		/*
+		 * Drop any pending non scheduler resets queued before reset is done.
+		 * Any reset scheduled after this point would be valid. Scheduler resets
+		 * were already dropped during drm_sched_stop and no new ones can come
+		 * in before drm_sched_start.
+		 */
+		amdgpu_device_stop_pending_resets(tmp_adev);
+	}
+
+	return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+			      struct amdgpu_reset_context *reset_context,
+			      bool   job_signaled)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+	int i, r = 0;
+
+	/* Post ASIC reset for all devs .*/
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!amdgpu_ring_sched_ready(ring))
+				continue;
+
+			drm_sched_start(&ring->sched, 0);
+		}
+
+		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
+			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
+
+		if (tmp_adev->asic_reset_res) {
+			/* bad news, how to tell it to userspace ?
+			 * for ras error, we should report GPU bad status instead of
+			 * reset failure
+			 */
+			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
+			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
+				dev_info(
+					tmp_adev->dev,
+					"GPU reset(%d) failed with error %d \n",
+					atomic_read(
+						&tmp_adev->gpu_reset_counter),
+					tmp_adev->asic_reset_res);
+			amdgpu_vf_error_put(tmp_adev,
+					    AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
+					    tmp_adev->asic_reset_res);
+			if (!r)
+				r = tmp_adev->asic_reset_res;
+			tmp_adev->asic_reset_res = 0;
+		} else {
+			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
+				 atomic_read(&tmp_adev->gpu_reset_counter));
+			if (amdgpu_acpi_smart_shift_update(tmp_adev,
+							   AMDGPU_SS_DEV_D0))
+				dev_warn(tmp_adev->dev,
+					 "smart shift update failed\n");
+		}
+	}
+
+	return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+			      struct list_head *device_list,
+			      bool   need_emergency_restart)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		/* unlock kfd: SRIOV would do it separately */
+		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
+			amdgpu_amdkfd_post_reset(tmp_adev);
+
+		/* kfd_post_reset will do nothing if kfd device is not initialized,
+		 * need to bring up kfd here if it's not be initialized before
+		 */
+		if (!adev->kfd.init_complete)
+			amdgpu_amdkfd_device_init(adev);
+
+		if (tmp_adev->pcie_reset_ctx.audio_suspended)
+			amdgpu_device_resume_display_audio(tmp_adev);
+
+		amdgpu_device_unset_mp1_state(tmp_adev);
+
+		amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
+	}
+}
+
+
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+			      struct amdgpu_job *job,
+			      struct amdgpu_reset_context *reset_context)
+{
+	struct list_head device_list;
+	bool job_signaled = false;
+	struct amdgpu_hive_info *hive = NULL;
+	int r = 0;
+	bool need_emergency_restart = false;
+
+	/*
+	 * If it reaches here because of hang/timeout and a RAS error is
+	 * detected at the same time, let RAS recovery take care of it.
+	 */
+	if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+	    !amdgpu_sriov_vf(adev) &&
+	    reset_context->src != AMDGPU_RESET_SRC_RAS) {
+		dev_dbg(adev->dev,
+			"Gpu recovery from source: %d yielding to RAS error recovery handling",
+			reset_context->src);
+		return 0;
+	}
+
+	/*
+	 * Special case: RAS triggered and full reset isn't supported
+	 */
+	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+	/*
+	 * Flush RAM to disk so that after reboot
+	 * the user can read log and see why the system rebooted.
+	 */
+	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+		amdgpu_ras_get_context(adev)->reboot) {
+		dev_warn(adev->dev, "Emergency reboot.");
+
+		ksys_sync_helper();
+		emergency_restart();
+	}
+
+	dev_info(adev->dev, "GPU %s begin!. Source:  %d\n",
+		 need_emergency_restart ? "jobs stop" : "reset",
+		 reset_context->src);
+
+	if (!amdgpu_sriov_vf(adev))
+		hive = amdgpu_get_xgmi_hive(adev);
+	if (hive)
+		mutex_lock(&hive->hive_lock);
+
+	reset_context->job = job;
+	reset_context->hive = hive;
+	INIT_LIST_HEAD(&device_list);
+
+	amdgpu_device_recovery_prepare(adev, &device_list, hive);
+
+	if (!amdgpu_sriov_vf(adev)) {
+		r = amdgpu_device_health_check(&device_list);
+		if (r)
+			goto end_reset;
+	}
+
+	/* Cannot be called after locking reset domain */
+	amdgpu_ras_pre_reset(adev, &device_list);
+
+	/* We need to lock reset domain only once both for XGMI and single device */
+	amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
+	amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+				      hive, need_emergency_restart);
+	if (need_emergency_restart)
+		goto skip_sched_resume;
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && dma_fence_is_signaled(&job->hw_fence->base)) {
+		job_signaled = true;
+		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+		goto skip_hw_reset;
+	}
+
+	r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+	if (r)
+		goto reset_unlock;
+skip_hw_reset:
+	r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+	if (r)
+		goto reset_unlock;
+skip_sched_resume:
+	amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+	amdgpu_ras_post_reset(adev, &device_list);
+end_reset:
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
+	}
+
+	if (r)
+		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
+
+	atomic_set(&adev->reset_domain->reset_res, r);
+
+	if (!r) {
+		struct amdgpu_task_info *ti = NULL;
+
+		if (job)
+			ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
+
+		drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE,
+				     ti ? &ti->task : NULL);
+
+		amdgpu_vm_put_task_info(ti);
+	}
+
+	return r;
+}
+
+/**
+ * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * first physical partner to an AMD dGPU.
+ * This will exclude any virtual switches and links.
+ */
+static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
+					    enum pci_bus_speed *speed,
+					    enum pcie_link_width *width)
+{
+	struct pci_dev *parent = adev->pdev;
+
+	if (!speed || !width)
+		return;
+
+	*speed = PCI_SPEED_UNKNOWN;
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
+		while ((parent = pci_upstream_bridge(parent))) {
+			/* skip upstream/downstream switches internal to dGPU*/
+			if (parent->vendor == PCI_VENDOR_ID_ATI)
+				continue;
+			*speed = pcie_get_speed_cap(parent);
+			*width = pcie_get_width_cap(parent);
+			break;
+		}
+	} else {
+		/* use the current speeds rather than max if switching is not supported */
+		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
+	}
+}
+
+/**
+ * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * AMD dGPU which may be a virtual upstream bridge.
+ */
+static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
+					enum pci_bus_speed *speed,
+					enum pcie_link_width *width)
+{
+	struct pci_dev *parent = adev->pdev;
+
+	if (!speed || !width)
+		return;
+
+	parent = pci_upstream_bridge(parent);
+	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
+		/* use the upstream/downstream switches internal to dGPU */
+		*speed = pcie_get_speed_cap(parent);
+		*width = pcie_get_width_cap(parent);
+		while ((parent = pci_upstream_bridge(parent))) {
+			if (parent->vendor == PCI_VENDOR_ID_ATI) {
+				/* use the upstream/downstream switches internal to dGPU */
+				*speed = pcie_get_speed_cap(parent);
+				*width = pcie_get_width_cap(parent);
+			}
+		}
+	} else {
+		/* use the device itself */
+		*speed = pcie_get_speed_cap(adev->pdev);
+		*width = pcie_get_width_cap(adev->pdev);
+	}
+}
+
+/**
+ * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
+ * and lanes) of the slot the device is in. Handles APUs and
+ * virtualized environments where PCIE config space may not be available.
+ */
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
+{
+	enum pci_bus_speed speed_cap, platform_speed_cap;
+	enum pcie_link_width platform_link_width, link_width;
+
+	if (amdgpu_pcie_gen_cap)
+		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
+
+	if (amdgpu_pcie_lane_cap)
+		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
+
+	/* covers APUs as well */
+	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
+		if (adev->pm.pcie_gen_mask == 0)
+			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+		if (adev->pm.pcie_mlw_mask == 0)
+			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
+		return;
+	}
+
+	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
+		return;
+
+	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
+					&platform_link_width);
+	amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
+
+	if (adev->pm.pcie_gen_mask == 0) {
+		/* asic caps */
+		if (speed_cap == PCI_SPEED_UNKNOWN) {
+			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+		} else {
+			if (speed_cap == PCIE_SPEED_32_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
+			else if (speed_cap == PCIE_SPEED_16_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
+			else if (speed_cap == PCIE_SPEED_8_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+			else if (speed_cap == PCIE_SPEED_5_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
+			else
+				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
+		}
+		/* platform caps */
+		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
+			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+		} else {
+			if (platform_speed_cap == PCIE_SPEED_32_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
+			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
+			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
+			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+			else
+				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+
+		}
+	}
+	if (adev->pm.pcie_mlw_mask == 0) {
+		/* asic caps */
+		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
+		} else {
+			switch (link_width) {
+			case PCIE_LNK_X32:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X16:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X12:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X8:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X4:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X2:
+				adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X1:
+				adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
+				break;
+			default:
+				break;
+			}
+		}
+		/* platform caps */
+		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
+		} else {
+			switch (platform_link_width) {
+			case PCIE_LNK_X32:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X16:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X12:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X8:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X4:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X2:
+				adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+							   CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+				break;
+			case PCIE_LNK_X1:
+				adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+}
+
+/**
+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+	bool p2p_access =
+		!adev->gmc.xgmi.connected_to_cpu &&
+		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+	if (!p2p_access)
+		dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
+			pci_name(peer_adev->pdev));
+
+	bool is_large_bar = adev->gmc.visible_vram_size &&
+		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
+	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
+
+	if (!p2p_addressable) {
+		uint64_t address_mask = peer_adev->dev->dma_mask ?
+			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+		resource_size_t aper_limit =
+			adev->gmc.aper_base + adev->gmc.aper_size - 1;
+
+		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
+				     aper_limit & address_mask);
+	}
+	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
+#else
+	return false;
+#endif
+}
+
+int amdgpu_device_baco_enter(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!amdgpu_device_supports_baco(adev))
+		return -ENOTSUPP;
+
+	if (ras && adev->ras_enabled &&
+	    adev->nbio.funcs->enable_doorbell_interrupt)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
+
+	return amdgpu_dpm_baco_enter(adev);
+}
+
+int amdgpu_device_baco_exit(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	if (!amdgpu_device_supports_baco(adev))
+		return -ENOTSUPP;
+
+	ret = amdgpu_dpm_baco_exit(adev);
+	if (ret)
+		return ret;
+
+	if (ras && adev->ras_enabled &&
+	    adev->nbio.funcs->enable_doorbell_interrupt)
+		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
+
+	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
+	    adev->nbio.funcs->clear_doorbell_interrupt)
+		adev->nbio.funcs->clear_doorbell_interrupt(adev);
+
+	return 0;
+}
+
+/**
+ * amdgpu_pci_error_detected - Called when a PCI error is detected.
+ * @pdev: PCI device struct
+ * @state: PCI channel state
+ *
+ * Description: Called when a PCI error is detected.
+ *
+ * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
+ */
+pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_hive_info *hive __free(xgmi_put_hive) =
+		amdgpu_get_xgmi_hive(adev);
+	struct amdgpu_reset_context reset_context;
+	struct list_head device_list;
+
+	dev_info(adev->dev, "PCI error: detected callback!!\n");
+
+	adev->pci_channel_state = state;
+
+	switch (state) {
+	case pci_channel_io_normal:
+		dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		/* Fatal error, prepare for slot reset */
+		dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+		if (hive) {
+			/* Hive devices should be able to support FW based
+			 * link reset on other devices, if not return.
+			 */
+			if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+				dev_warn(adev->dev,
+					 "No support for XGMI hive yet...\n");
+				return PCI_ERS_RESULT_DISCONNECT;
+			}
+			/* Set dpc status only if device is part of hive
+			 * Non-hive devices should be able to recover after
+			 * link reset.
+			 */
+			amdgpu_reset_set_dpc_status(adev, true);
+
+			mutex_lock(&hive->hive_lock);
+		}
+		memset(&reset_context, 0, sizeof(reset_context));
+		INIT_LIST_HEAD(&device_list);
+
+		amdgpu_device_recovery_prepare(adev, &device_list, hive);
+		amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+		amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+					      hive, false);
+		if (hive)
+			mutex_unlock(&hive->hive_lock);
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		/* Permanent error, prepare for device removal */
+		dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
+ * @pdev: pointer to PCI device
+ */
+pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
+
+	/* TODO - dump whatever for debugging purposes */
+
+	/* This called only if amdgpu_pci_error_detected returns
+	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
+	 * works, no need to reset slot.
+	 */
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
+ * @pdev: PCI device struct
+ *
+ * Description: This routine is called by the pci error recovery
+ * code after the PCI slot has been reset, just before we
+ * should resume normal operations.
+ */
+pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_reset_context reset_context;
+	struct amdgpu_device *tmp_adev;
+	struct amdgpu_hive_info *hive;
+	struct list_head device_list;
+	struct pci_dev *link_dev;
+	int r = 0, i, timeout;
+	u32 memsize;
+	u16 status;
+
+	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
+
+	memset(&reset_context, 0, sizeof(reset_context));
+
+	if (adev->pcie_reset_ctx.swus)
+		link_dev = adev->pcie_reset_ctx.swus;
+	else
+		link_dev = adev->pdev;
+	/* wait for asic to come out of reset, timeout = 10s */
+	timeout = 10000;
+	do {
+		usleep_range(10000, 10500);
+		r = pci_read_config_word(link_dev, PCI_VENDOR_ID, &status);
+		timeout -= 10;
+	} while (timeout > 0 && (status != PCI_VENDOR_ID_ATI) &&
+		 (status != PCI_VENDOR_ID_AMD));
+
+	if ((status != PCI_VENDOR_ID_ATI) && (status != PCI_VENDOR_ID_AMD)) {
+		r = -ETIME;
+		goto out;
+	}
+
+	amdgpu_device_load_switch_state(adev);
+	/* Restore PCI confspace */
+	amdgpu_device_load_pci_state(pdev);
+
+	/* confirm  ASIC came out of reset */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		memsize = amdgpu_asic_get_config_memsize(adev);
+
+		if (memsize != 0xffffffff)
+			break;
+		udelay(1);
+	}
+	if (memsize == 0xffffffff) {
+		r = -ETIME;
+		goto out;
+	}
+
+	reset_context.method = AMD_RESET_METHOD_NONE;
+	reset_context.reset_req_dev = adev;
+	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+	INIT_LIST_HEAD(&device_list);
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		mutex_lock(&hive->hive_lock);
+		reset_context.hive = hive;
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			tmp_adev->pcie_reset_ctx.in_link_reset = true;
+			list_add_tail(&tmp_adev->reset_list, &device_list);
+		}
+	} else {
+		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+		list_add_tail(&adev->reset_list, &device_list);
+	}
+
+	r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
+out:
+	if (!r) {
+		if (amdgpu_device_cache_pci_state(adev->pdev))
+			pci_restore_state(adev->pdev);
+		dev_info(adev->dev, "PCIe error recovery succeeded\n");
+	} else {
+		dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+		if (hive) {
+			list_for_each_entry(tmp_adev, &device_list, reset_list)
+				amdgpu_device_unset_mp1_state(tmp_adev);
+		}
+		amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+	}
+
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
+	}
+
+	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * amdgpu_pci_resume() - resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation.
+ */
+void amdgpu_pci_resume(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct list_head device_list;
+	struct amdgpu_hive_info *hive = NULL;
+	struct amdgpu_device *tmp_adev = NULL;
+
+	dev_info(adev->dev, "PCI error: resume callback!!\n");
+
+	/* Only continue execution for the case of pci_channel_io_frozen */
+	if (adev->pci_channel_state != pci_channel_io_frozen)
+		return;
+
+	INIT_LIST_HEAD(&device_list);
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		mutex_lock(&hive->hive_lock);
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			tmp_adev->pcie_reset_ctx.in_link_reset = false;
+			list_add_tail(&tmp_adev->reset_list, &device_list);
+		}
+	} else
+		list_add_tail(&adev->reset_list, &device_list);
+
+	amdgpu_device_sched_resume(&device_list, NULL, NULL);
+	amdgpu_device_gpu_resume(adev, &device_list, false);
+	amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
+	}
+}
+
+static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
+{
+	struct pci_dev *swus, *swds;
+	int r;
+
+	swds = pci_upstream_bridge(adev->pdev);
+	if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
+	    pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
+		return;
+	swus = pci_upstream_bridge(swds);
+	if (!swus ||
+	    (swus->vendor != PCI_VENDOR_ID_ATI &&
+	     swus->vendor != PCI_VENDOR_ID_AMD) ||
+	    pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
+		return;
+
+	/* If already saved, return */
+	if (adev->pcie_reset_ctx.swus)
+		return;
+	/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
+	r = pci_save_state(swds);
+	if (r)
+		return;
+	adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
+
+	r = pci_save_state(swus);
+	if (r)
+		return;
+	adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
+
+	adev->pcie_reset_ctx.swus = swus;
+}
+
+static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)
+{
+	struct pci_dev *pdev;
+	int r;
+
+	if (!adev->pcie_reset_ctx.swds_pcistate ||
+	    !adev->pcie_reset_ctx.swus_pcistate)
+		return;
+
+	pdev = adev->pcie_reset_ctx.swus;
+	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swus_pcistate);
+	if (!r) {
+		pci_restore_state(pdev);
+	} else {
+		dev_warn(adev->dev, "Failed to load SWUS state, err:%d\n", r);
+		return;
+	}
+
+	pdev = pci_upstream_bridge(adev->pdev);
+	r = pci_load_saved_state(pdev, adev->pcie_reset_ctx.swds_pcistate);
+	if (!r)
+		pci_restore_state(pdev);
+	else
+		dev_warn(adev->dev, "Failed to load SWDS state, err:%d\n", r);
+}
+
+bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int r;
+
+	if (amdgpu_sriov_vf(adev))
+		return false;
+
+	r = pci_save_state(pdev);
+	if (!r) {
+		kfree(adev->pci_state);
+
+		adev->pci_state = pci_store_saved_state(pdev);
+
+		if (!adev->pci_state) {
+			dev_err(adev->dev, "Failed to store PCI saved state");
+			return false;
+		}
+	} else {
+		dev_warn(adev->dev, "Failed to save PCI state, err:%d\n", r);
+		return false;
+	}
+
+	amdgpu_device_cache_switch_state(adev);
+
+	return true;
+}
+
+bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int r;
+
+	if (!adev->pci_state)
+		return false;
+
+	r = pci_load_saved_state(pdev, adev->pci_state);
+
+	if (!r) {
+		pci_restore_state(pdev);
+	} else {
+		dev_warn(adev->dev, "Failed to load PCI state, err:%d\n", r);
+		return false;
+	}
+
+	return true;
+}
+
+void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+		return;
+#endif
+	if (adev->gmc.xgmi.connected_to_cpu)
+		return;
+
+	if (ring && ring->funcs->emit_hdp_flush) {
+		amdgpu_ring_emit_hdp_flush(ring);
+		return;
+	}
+
+	if (!ring && amdgpu_sriov_runtime(adev)) {
+		if (!amdgpu_kiq_hdp_flush(adev))
+			return;
+	}
+
+	amdgpu_hdp_flush(adev, ring);
+}
+
+void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
+		struct amdgpu_ring *ring)
+{
+#ifdef CONFIG_X86_64
+	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
+		return;
+#endif
+	if (adev->gmc.xgmi.connected_to_cpu)
+		return;
+
+	amdgpu_hdp_invalidate(adev, ring);
+}
+
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+	return atomic_read(&adev->reset_domain->in_gpu_reset);
+}
+
+/**
+ * amdgpu_device_halt() - bring hardware to some kind of halt state
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Bring hardware to some kind of halt state so that no one can touch it
+ * any more. It will help to maintain error context when error occurred.
+ * Compare to a simple hang, the system will keep stable at least for SSH
+ * access. Then it should be trivial to inspect the hardware state and
+ * see what's going on. Implemented as following:
+ *
+ * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
+ *    clears all CPU mappings to device, disallows remappings through page faults
+ * 2. amdgpu_irq_disable_all() disables all interrupts
+ * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
+ * 4. set adev->no_hw_access to avoid potential crashes after setp 5
+ * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
+ * 6. pci_disable_device() and pci_wait_for_pending_transaction()
+ *    flush any in flight DMA operations
+ */
+void amdgpu_device_halt(struct amdgpu_device *adev)
+{
+	struct pci_dev *pdev = adev->pdev;
+	struct drm_device *ddev = adev_to_drm(adev);
+
+	amdgpu_xcp_dev_unplug(adev);
+	drm_dev_unplug(ddev);
+
+	amdgpu_irq_disable_all(adev);
+
+	amdgpu_fence_driver_hw_fini(adev);
+
+	adev->no_hw_access = true;
+
+	amdgpu_device_unmap_mmio(adev);
+
+	pci_disable_device(pdev);
+	pci_wait_for_pending_transaction(pdev);
+}
+
+u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
+				u32 reg)
+{
+	unsigned long flags, address, data;
+	u32 r;
+
+	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, reg * 4);
+	(void)RREG32(address);
+	r = RREG32(data);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+	return r;
+}
+
+void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
+				u32 reg, u32 v)
+{
+	unsigned long flags, address, data;
+
+	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, reg * 4);
+	(void)RREG32(address);
+	WREG32(data, v);
+	(void)RREG32(data);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/**
+ * amdgpu_device_get_gang - return a reference to the current gang
+ * @adev: amdgpu_device pointer
+ *
+ * Returns: A new reference to the current gang leader.
+ */
+struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
+{
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
+	rcu_read_unlock();
+	return fence;
+}
+
+/**
+ * amdgpu_device_switch_gang - switch to a new gang
+ * @adev: amdgpu_device pointer
+ * @gang: the gang to switch to
+ *
+ * Try to switch to a new gang.
+ * Returns: NULL if we switched to the new gang or a reference to the current
+ * gang leader.
+ */
+struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
+					    struct dma_fence *gang)
+{
+	struct dma_fence *old = NULL;
+
+	dma_fence_get(gang);
+	do {
+		dma_fence_put(old);
+		old = amdgpu_device_get_gang(adev);
+		if (old == gang)
+			break;
+
+		if (!dma_fence_is_signaled(old)) {
+			dma_fence_put(gang);
+			return old;
+		}
+
+	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
+			 old, gang) != old);
+
+	/*
+	 * Drop it once for the exchanged reference in adev and once for the
+	 * thread local reference acquired in amdgpu_device_get_gang().
+	 */
+	dma_fence_put(old);
+	dma_fence_put(old);
+	return NULL;
+}
+
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+						  struct amdgpu_ring *ring,
+						  struct amdgpu_job *job)
+{
+	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+	struct drm_sched_fence *f = job->base.s_fence;
+	struct dma_fence *dep;
+	void *owner;
+	int r;
+
+	/*
+	 * For now enforce isolation only for the GFX block since we only need
+	 * the cleaner shader on those rings.
+	 */
+	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+	    ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+		return NULL;
+
+	/*
+	 * All submissions where enforce isolation is false are handled as if
+	 * they come from a single client. Use ~0l as the owner to distinct it
+	 * from kernel submissions where the owner is NULL.
+	 */
+	owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+
+	/*
+	 * The "spearhead" submission is the first one which changes the
+	 * ownership to its client. We always need to wait for it to be
+	 * pushed to the HW before proceeding with anything.
+	 */
+	if (&f->scheduled != isolation->spearhead &&
+	    !dma_fence_is_signaled(isolation->spearhead)) {
+		dep = isolation->spearhead;
+		goto out_grab_ref;
+	}
+
+	if (isolation->owner != owner) {
+
+		/*
+		 * Wait for any gang to be assembled before switching to a
+		 * different owner or otherwise we could deadlock the
+		 * submissions.
+		 */
+		if (!job->gang_submit) {
+			dep = amdgpu_device_get_gang(adev);
+			if (!dma_fence_is_signaled(dep))
+				goto out_return_dep;
+			dma_fence_put(dep);
+		}
+
+		dma_fence_put(isolation->spearhead);
+		isolation->spearhead = dma_fence_get(&f->scheduled);
+		amdgpu_sync_move(&isolation->active, &isolation->prev);
+		trace_amdgpu_isolation(isolation->owner, owner);
+		isolation->owner = owner;
+	}
+
+	/*
+	 * Specifying the ring here helps to pipeline submissions even when
+	 * isolation is enabled. If that is not desired for testing NULL can be
+	 * used instead of the ring to enforce a CPU round trip while switching
+	 * between clients.
+	 */
+	dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+	r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+	if (r)
+		dev_warn(adev->dev, "OOM tracking isolation\n");
+
+out_grab_ref:
+	dma_fence_get(dep);
+out_return_dep:
+	mutex_unlock(&adev->enforce_isolation_mutex);
+	return dep;
+}
+
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_HAINAN:
+#endif
+	case CHIP_TOPAZ:
+		/* chips with no display hardware */
+		return false;
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+#endif
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		/* chips with display hardware */
+		return true;
+	default:
+		/* IP discovery */
+		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
+		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+			return false;
+		return true;
+	}
+}
+
+uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
+		uint32_t inst, uint32_t reg_addr, char reg_name[],
+		uint32_t expected_value, uint32_t mask)
+{
+	uint32_t ret = 0;
+	uint32_t old_ = 0;
+	uint32_t tmp_ = RREG32(reg_addr);
+	uint32_t loop = adev->usec_timeout;
+
+	while ((tmp_ & (mask)) != (expected_value)) {
+		if (old_ != tmp_) {
+			loop = adev->usec_timeout;
+			old_ = tmp_;
+		} else
+			udelay(1);
+		tmp_ = RREG32(reg_addr);
+		loop--;
+		if (!loop) {
+			dev_warn(
+				adev->dev,
+				"Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
+				inst, reg_name, (uint32_t)expected_value,
+				(uint32_t)(tmp_ & (mask)));
+			ret = -ETIMEDOUT;
+			break;
+		}
+	}
+	return ret;
+}
+
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
+{
+	ssize_t size = 0;
+
+	if (!ring || !ring->adev)
+		return size;
+
+	if (amdgpu_device_should_recover_gpu(ring->adev))
+		size |= AMDGPU_RESET_TYPE_FULL;
+
+	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
+	    !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
+		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
+
+	return size;
+}
+
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
+{
+	ssize_t size = 0;
+
+	if (supported_reset == 0) {
+		size += sysfs_emit_at(buf, size, "unsupported");
+		size += sysfs_emit_at(buf, size, "\n");
+		return size;
+
+	}
+
+	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
+		size += sysfs_emit_at(buf, size, "soft ");
+
+	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+		size += sysfs_emit_at(buf, size, "queue ");
+
+	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
+		size += sysfs_emit_at(buf, size, "pipe ");
+
+	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
+		size += sysfs_emit_at(buf, size, "full ");
+
+	size += sysfs_emit_at(buf, size, "\n");
+	return size;
+}
+
+void amdgpu_device_set_uid(struct amdgpu_uid *uid_info,
+			   enum amdgpu_uid_type type, uint8_t inst,
+			   uint64_t uid)
+{
+	if (!uid_info)
+		return;
+
+	if (type >= AMDGPU_UID_TYPE_MAX) {
+		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+			     type);
+		return;
+	}
+
+	if (inst >= AMDGPU_UID_INST_MAX) {
+		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+			     inst);
+		return;
+	}
+
+	if (uid_info->uid[type][inst] != 0) {
+		dev_warn_once(
+			uid_info->adev->dev,
+			"Overwriting existing UID %llu for type %d instance %d\n",
+			uid_info->uid[type][inst], type, inst);
+	}
+
+	uid_info->uid[type][inst] = uid;
+}
+
+u64 amdgpu_device_get_uid(struct amdgpu_uid *uid_info,
+			  enum amdgpu_uid_type type, uint8_t inst)
+{
+	if (!uid_info)
+		return 0;
+
+	if (type >= AMDGPU_UID_TYPE_MAX) {
+		dev_err_once(uid_info->adev->dev, "Invalid UID type %d\n",
+			     type);
+		return 0;
+	}
+
+	if (inst >= AMDGPU_UID_INST_MAX) {
+		dev_err_once(uid_info->adev->dev, "Invalid UID instance %d\n",
+			     inst);
+		return 0;
+	}
+
+	return uid_info->uid[type][inst];
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
new file mode 100644
index 000000000000..eb605e79ae0e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DF_H__
+#define __AMDGPU_DF_H__
+
+struct amdgpu_df_hash_status {
+	bool hash_64k;
+	bool hash_2m;
+	bool hash_1g;
+};
+
+struct amdgpu_df_funcs {
+	void (*sw_init)(struct amdgpu_device *adev);
+	void (*sw_fini)(struct amdgpu_device *adev);
+	void (*hw_init)(struct amdgpu_device *adev);
+	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+				      bool enable);
+	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u64 *flags);
+	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+					    bool enable);
+	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+					 int counter_idx, int is_add);
+	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+					 int counter_idx, int is_remove);
+	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+					 int counter_idx, uint64_t *count);
+	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
+	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
+			 uint32_t ficadl_val, uint32_t ficadh_val);
+	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_df {
+	struct amdgpu_df_hash_status	hash_status;
+	const struct amdgpu_df_funcs	*funcs;
+};
+
+#endif /* __AMDGPU_DF_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
new file mode 100644
index 000000000000..fa2a22dfa048
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -0,0 +1,3241 @@
+/*
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+
+#include "amdgpu.h"
+#include "amdgpu_discovery.h"
+#include "soc15_hw_ip.h"
+#include "discovery.h"
+#include "amdgpu_ras.h"
+
+#include "soc15.h"
+#include "gfx_v9_0.h"
+#include "gfx_v9_4_3.h"
+#include "gmc_v9_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
+#include "df_v4_3.h"
+#include "df_v4_6_2.h"
+#include "df_v4_15.h"
+#include "nbio_v6_1.h"
+#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
+#include "nbio_v7_9.h"
+#include "nbio_v7_11.h"
+#include "hdp_v4_0.h"
+#include "vega10_ih.h"
+#include "vega20_ih.h"
+#include "sdma_v4_0.h"
+#include "sdma_v4_4_2.h"
+#include "uvd_v7_0.h"
+#include "vce_v4_0.h"
+#include "vcn_v1_0.h"
+#include "vcn_v2_5.h"
+#include "jpeg_v2_5.h"
+#include "smuio_v9_0.h"
+#include "gmc_v10_0.h"
+#include "gmc_v11_0.h"
+#include "gmc_v12_0.h"
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "nbio_v2_3.h"
+#include "nbio_v4_3.h"
+#include "nbio_v7_2.h"
+#include "nbio_v7_7.h"
+#include "nbif_v6_3_1.h"
+#include "hdp_v5_0.h"
+#include "hdp_v5_2.h"
+#include "hdp_v6_0.h"
+#include "hdp_v7_0.h"
+#include "nv.h"
+#include "soc21.h"
+#include "soc24.h"
+#include "navi10_ih.h"
+#include "ih_v6_0.h"
+#include "ih_v6_1.h"
+#include "ih_v7_0.h"
+#include "gfx_v10_0.h"
+#include "gfx_v11_0.h"
+#include "gfx_v12_0.h"
+#include "sdma_v5_0.h"
+#include "sdma_v5_2.h"
+#include "sdma_v6_0.h"
+#include "sdma_v7_0.h"
+#include "lsdma_v6_0.h"
+#include "lsdma_v7_0.h"
+#include "vcn_v2_0.h"
+#include "jpeg_v2_0.h"
+#include "vcn_v3_0.h"
+#include "jpeg_v3_0.h"
+#include "vcn_v4_0.h"
+#include "jpeg_v4_0.h"
+#include "vcn_v4_0_3.h"
+#include "jpeg_v4_0_3.h"
+#include "vcn_v4_0_5.h"
+#include "jpeg_v4_0_5.h"
+#include "amdgpu_vkms.h"
+#include "mes_v11_0.h"
+#include "mes_v12_0.h"
+#include "smuio_v11_0.h"
+#include "smuio_v11_0_6.h"
+#include "smuio_v13_0.h"
+#include "smuio_v13_0_3.h"
+#include "smuio_v13_0_6.h"
+#include "smuio_v14_0_2.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
+#include "amdgpu_ras_mgr.h"
+
+#include "amdgpu_vpe.h"
+#if defined(CONFIG_DRM_AMD_ISP)
+#include "amdgpu_isp.h"
+#endif
+
+MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
+
+#define mmIP_DISCOVERY_VERSION  0x16A00
+#define mmRCC_CONFIG_MEMSIZE	0xde3
+#define mmMP0_SMN_C2PMSG_33	0x16061
+#define mmMM_INDEX		0x0
+#define mmMM_INDEX_HI		0x6
+#define mmMM_DATA		0x1
+
+static const char *hw_id_names[HW_ID_MAX] = {
+	[MP1_HWID]		= "MP1",
+	[MP2_HWID]		= "MP2",
+	[THM_HWID]		= "THM",
+	[SMUIO_HWID]		= "SMUIO",
+	[FUSE_HWID]		= "FUSE",
+	[CLKA_HWID]		= "CLKA",
+	[PWR_HWID]		= "PWR",
+	[GC_HWID]		= "GC",
+	[UVD_HWID]		= "UVD",
+	[AUDIO_AZ_HWID]		= "AUDIO_AZ",
+	[ACP_HWID]		= "ACP",
+	[DCI_HWID]		= "DCI",
+	[DMU_HWID]		= "DMU",
+	[DCO_HWID]		= "DCO",
+	[DIO_HWID]		= "DIO",
+	[XDMA_HWID]		= "XDMA",
+	[DCEAZ_HWID]		= "DCEAZ",
+	[DAZ_HWID]		= "DAZ",
+	[SDPMUX_HWID]		= "SDPMUX",
+	[NTB_HWID]		= "NTB",
+	[IOHC_HWID]		= "IOHC",
+	[L2IMU_HWID]		= "L2IMU",
+	[VCE_HWID]		= "VCE",
+	[MMHUB_HWID]		= "MMHUB",
+	[ATHUB_HWID]		= "ATHUB",
+	[DBGU_NBIO_HWID]	= "DBGU_NBIO",
+	[DFX_HWID]		= "DFX",
+	[DBGU0_HWID]		= "DBGU0",
+	[DBGU1_HWID]		= "DBGU1",
+	[OSSSYS_HWID]		= "OSSSYS",
+	[HDP_HWID]		= "HDP",
+	[SDMA0_HWID]		= "SDMA0",
+	[SDMA1_HWID]		= "SDMA1",
+	[SDMA2_HWID]		= "SDMA2",
+	[SDMA3_HWID]		= "SDMA3",
+	[LSDMA_HWID]		= "LSDMA",
+	[ISP_HWID]		= "ISP",
+	[DBGU_IO_HWID]		= "DBGU_IO",
+	[DF_HWID]		= "DF",
+	[CLKB_HWID]		= "CLKB",
+	[FCH_HWID]		= "FCH",
+	[DFX_DAP_HWID]		= "DFX_DAP",
+	[L1IMU_PCIE_HWID]	= "L1IMU_PCIE",
+	[L1IMU_NBIF_HWID]	= "L1IMU_NBIF",
+	[L1IMU_IOAGR_HWID]	= "L1IMU_IOAGR",
+	[L1IMU3_HWID]		= "L1IMU3",
+	[L1IMU4_HWID]		= "L1IMU4",
+	[L1IMU5_HWID]		= "L1IMU5",
+	[L1IMU6_HWID]		= "L1IMU6",
+	[L1IMU7_HWID]		= "L1IMU7",
+	[L1IMU8_HWID]		= "L1IMU8",
+	[L1IMU9_HWID]		= "L1IMU9",
+	[L1IMU10_HWID]		= "L1IMU10",
+	[L1IMU11_HWID]		= "L1IMU11",
+	[L1IMU12_HWID]		= "L1IMU12",
+	[L1IMU13_HWID]		= "L1IMU13",
+	[L1IMU14_HWID]		= "L1IMU14",
+	[L1IMU15_HWID]		= "L1IMU15",
+	[WAFLC_HWID]		= "WAFLC",
+	[FCH_USB_PD_HWID]	= "FCH_USB_PD",
+	[PCIE_HWID]		= "PCIE",
+	[PCS_HWID]		= "PCS",
+	[DDCL_HWID]		= "DDCL",
+	[SST_HWID]		= "SST",
+	[IOAGR_HWID]		= "IOAGR",
+	[NBIF_HWID]		= "NBIF",
+	[IOAPIC_HWID]		= "IOAPIC",
+	[SYSTEMHUB_HWID]	= "SYSTEMHUB",
+	[NTBCCP_HWID]		= "NTBCCP",
+	[UMC_HWID]		= "UMC",
+	[SATA_HWID]		= "SATA",
+	[USB_HWID]		= "USB",
+	[CCXSEC_HWID]		= "CCXSEC",
+	[XGMI_HWID]		= "XGMI",
+	[XGBE_HWID]		= "XGBE",
+	[MP0_HWID]		= "MP0",
+	[VPE_HWID]		= "VPE",
+};
+
+static int hw_id_map[MAX_HWIP] = {
+	[GC_HWIP]	= GC_HWID,
+	[HDP_HWIP]	= HDP_HWID,
+	[SDMA0_HWIP]	= SDMA0_HWID,
+	[SDMA1_HWIP]	= SDMA1_HWID,
+	[SDMA2_HWIP]    = SDMA2_HWID,
+	[SDMA3_HWIP]    = SDMA3_HWID,
+	[LSDMA_HWIP]    = LSDMA_HWID,
+	[MMHUB_HWIP]	= MMHUB_HWID,
+	[ATHUB_HWIP]	= ATHUB_HWID,
+	[NBIO_HWIP]	= NBIF_HWID,
+	[MP0_HWIP]	= MP0_HWID,
+	[MP1_HWIP]	= MP1_HWID,
+	[UVD_HWIP]	= UVD_HWID,
+	[VCE_HWIP]	= VCE_HWID,
+	[DF_HWIP]	= DF_HWID,
+	[DCE_HWIP]	= DMU_HWID,
+	[OSSSYS_HWIP]	= OSSSYS_HWID,
+	[SMUIO_HWIP]	= SMUIO_HWID,
+	[PWR_HWIP]	= PWR_HWID,
+	[NBIF_HWIP]	= NBIF_HWID,
+	[THM_HWIP]	= THM_HWID,
+	[CLK_HWIP]	= CLKA_HWID,
+	[UMC_HWIP]	= UMC_HWID,
+	[XGMI_HWIP]	= XGMI_HWID,
+	[DCI_HWIP]	= DCI_HWID,
+	[PCIE_HWIP]	= PCIE_HWID,
+	[VPE_HWIP]	= VPE_HWID,
+	[ISP_HWIP]	= ISP_HWID,
+};
+
+static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary)
+{
+	u64 tmr_offset, tmr_size, pos;
+	void *discv_regn;
+	int ret;
+
+	ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size);
+	if (ret)
+		return ret;
+
+	pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET;
+
+	/* This region is read-only and reserved from system use */
+	discv_regn = memremap(pos, adev->discovery.size, MEMREMAP_WC);
+	if (discv_regn) {
+		memcpy(binary, discv_regn, adev->discovery.size);
+		memunmap(discv_regn);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+#define IP_DISCOVERY_V2		2
+#define IP_DISCOVERY_V4		4
+
+static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
+						 uint8_t *binary)
+{
+	bool sz_valid = true;
+	uint64_t vram_size;
+	int i, ret = 0;
+	u32 msg;
+
+	if (!amdgpu_sriov_vf(adev)) {
+		/* It can take up to two second for IFWI init to complete on some dGPUs,
+		 * but generally it should be in the 60-100ms range.  Normally this starts
+		 * as soon as the device gets power so by the time the OS loads this has long
+		 * completed.  However, when a card is hotplugged via e.g., USB4, we need to
+		 * wait for this to complete.  Once the C2PMSG is updated, we can
+		 * continue.
+		 */
+
+		for (i = 0; i < 2000; i++) {
+			msg = RREG32(mmMP0_SMN_C2PMSG_33);
+			if (msg & 0x80000000)
+				break;
+			msleep(1);
+		}
+	}
+
+	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+	if (!vram_size || vram_size == U32_MAX)
+		sz_valid = false;
+	else
+		vram_size <<= 20;
+
+	/*
+	 * If in VRAM, discovery TMR is marked for reservation. If it is in system mem,
+	 * then it is not required to be reserved.
+	 */
+	if (sz_valid) {
+		if (amdgpu_sriov_vf(adev) && adev->virt.is_dynamic_crit_regn_enabled) {
+			/* For SRIOV VFs with dynamic critical region enabled,
+			 * we will get the IPD binary via below call.
+			 * If dynamic critical is disabled, fall through to normal seq.
+			 */
+			if (amdgpu_virt_get_dynamic_data_info(adev,
+						AMD_SRIOV_MSG_IPD_TABLE_ID, binary,
+						&adev->discovery.size)) {
+				dev_err(adev->dev,
+						"failed to read discovery info from dynamic critical region.");
+				ret = -EINVAL;
+				goto exit;
+			}
+		} else {
+			uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
+
+			amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
+					adev->discovery.size, false);
+			adev->discovery.reserve_tmr = true;
+		}
+	} else {
+		ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
+	}
+
+	if (ret)
+		dev_err(adev->dev,
+			"failed to read discovery info from memory, vram size read: %llx",
+			vram_size);
+exit:
+	return ret;
+}
+
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
+							uint8_t *binary,
+							const char *fw_name)
+{
+	const struct firmware *fw;
+	int r;
+
+	r = firmware_request_nowarn(&fw, fw_name, adev->dev);
+	if (r) {
+		if (amdgpu_discovery == 2)
+			dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+		else
+			drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
+		return r;
+	}
+
+	memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
+	release_firmware(fw);
+
+	return 0;
+}
+
+static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
+{
+	uint16_t checksum = 0;
+	int i;
+
+	for (i = 0; i < size; i++)
+		checksum += data[i];
+
+	return checksum;
+}
+
+static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
+						    uint16_t expected)
+{
+	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
+}
+
+static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
+{
+	struct binary_header *bhdr;
+	bhdr = (struct binary_header *)binary;
+
+	return (le32_to_cpu(bhdr->binary_signature) == BINARY_SIGNATURE);
+}
+
+static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
+{
+	/*
+	 * So far, apply this quirk only on those Navy Flounder boards which
+	 * have a bad harvest table of VCN config.
+	 */
+	if ((amdgpu_ip_version(adev, UVD_HWIP, 1) == IP_VERSION(3, 0, 1)) &&
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 2))) {
+		switch (adev->pdev->revision) {
+		case 0xC1:
+		case 0xC2:
+		case 0xC3:
+		case 0xC5:
+		case 0xC7:
+		case 0xCF:
+		case 0xDF:
+			adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+			adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
+					   struct binary_header *bhdr)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct table_info *info;
+	uint16_t checksum;
+	uint16_t offset;
+
+	info = &bhdr->table_list[NPS_INFO];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	struct nps_info_header *nhdr =
+		(struct nps_info_header *)(discovery_bin + offset);
+
+	if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
+		dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
+		return -EINVAL;
+	}
+
+	if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+					      le32_to_cpu(nhdr->size_bytes),
+					      checksum)) {
+		dev_dbg(adev->dev, "invalid nps info data table checksum\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
+{
+	if (amdgpu_discovery == 2) {
+		/* Assume there is valid discovery TMR in VRAM even if binary is sideloaded */
+		adev->discovery.reserve_tmr = true;
+		return "amdgpu/ip_discovery.bin";
+	}
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+		return "amdgpu/vega10_ip_discovery.bin";
+	case CHIP_VEGA12:
+		return "amdgpu/vega12_ip_discovery.bin";
+	case CHIP_RAVEN:
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+			return "amdgpu/raven2_ip_discovery.bin";
+		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+			return "amdgpu/picasso_ip_discovery.bin";
+		else
+			return "amdgpu/raven_ip_discovery.bin";
+	case CHIP_VEGA20:
+		return "amdgpu/vega20_ip_discovery.bin";
+	case CHIP_ARCTURUS:
+		return "amdgpu/arcturus_ip_discovery.bin";
+	case CHIP_ALDEBARAN:
+		return "amdgpu/aldebaran_ip_discovery.bin";
+	default:
+		return NULL;
+	}
+}
+
+static int amdgpu_discovery_init(struct amdgpu_device *adev)
+{
+	struct table_info *info;
+	struct binary_header *bhdr;
+	uint8_t *discovery_bin;
+	const char *fw_name;
+	uint16_t offset;
+	uint16_t size;
+	uint16_t checksum;
+	int r;
+
+	adev->discovery.bin = kzalloc(DISCOVERY_TMR_SIZE, GFP_KERNEL);
+	if (!adev->discovery.bin)
+		return -ENOMEM;
+	adev->discovery.size = DISCOVERY_TMR_SIZE;
+	adev->discovery.debugfs_blob.data = adev->discovery.bin;
+	adev->discovery.debugfs_blob.size = adev->discovery.size;
+
+	discovery_bin = adev->discovery.bin;
+	/* Read from file if it is the preferred option */
+	fw_name = amdgpu_discovery_get_fw_name(adev);
+	if (fw_name != NULL) {
+		drm_dbg(&adev->ddev, "use ip discovery information from file");
+		r = amdgpu_discovery_read_binary_from_file(adev, discovery_bin,
+							   fw_name);
+		if (r)
+			goto out;
+	} else {
+		drm_dbg(&adev->ddev, "use ip discovery information from memory");
+		r = amdgpu_discovery_read_binary_from_mem(adev, discovery_bin);
+		if (r)
+			goto out;
+	}
+
+	/* check the ip discovery binary signature */
+	if (!amdgpu_discovery_verify_binary_signature(discovery_bin)) {
+		dev_err(adev->dev,
+			"get invalid ip discovery binary signature\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	bhdr = (struct binary_header *)discovery_bin;
+
+	offset = offsetof(struct binary_header, binary_checksum) +
+		sizeof(bhdr->binary_checksum);
+	size = le16_to_cpu(bhdr->binary_size) - offset;
+	checksum = le16_to_cpu(bhdr->binary_checksum);
+
+	if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
+					      checksum)) {
+		dev_err(adev->dev, "invalid ip discovery binary checksum\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	info = &bhdr->table_list[IP_DISCOVERY];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	if (offset) {
+		struct ip_discovery_header *ihdr =
+			(struct ip_discovery_header *)(discovery_bin + offset);
+		if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+			dev_err(adev->dev, "invalid ip discovery data table signature\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+						      le16_to_cpu(ihdr->size),
+						      checksum)) {
+			dev_err(adev->dev, "invalid ip discovery data table checksum\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	info = &bhdr->table_list[GC];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	if (offset) {
+		struct gpu_info_header *ghdr =
+			(struct gpu_info_header *)(discovery_bin + offset);
+
+		if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
+			dev_err(adev->dev, "invalid ip discovery gc table id\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
+						      le32_to_cpu(ghdr->size),
+						      checksum)) {
+			dev_err(adev->dev, "invalid gc data table checksum\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	info = &bhdr->table_list[HARVEST_INFO];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	if (offset) {
+		struct harvest_info_header *hhdr =
+			(struct harvest_info_header *)(discovery_bin + offset);
+
+		if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
+			dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!amdgpu_discovery_verify_checksum(
+			    discovery_bin + offset,
+			    sizeof(struct harvest_table), checksum)) {
+			dev_err(adev->dev, "invalid harvest data table checksum\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	info = &bhdr->table_list[VCN_INFO];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	if (offset) {
+		struct vcn_info_header *vhdr =
+			(struct vcn_info_header *)(discovery_bin + offset);
+
+		if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
+			dev_err(adev->dev, "invalid ip discovery vcn table id\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!amdgpu_discovery_verify_checksum(
+			    discovery_bin + offset,
+			    le32_to_cpu(vhdr->size_bytes), checksum)) {
+			dev_err(adev->dev, "invalid vcn data table checksum\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	info = &bhdr->table_list[MALL_INFO];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+
+	if (0 && offset) {
+		struct mall_info_header *mhdr =
+			(struct mall_info_header *)(discovery_bin + offset);
+
+		if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
+			dev_err(adev->dev, "invalid ip discovery mall table id\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!amdgpu_discovery_verify_checksum(
+			    discovery_bin + offset,
+			    le32_to_cpu(mhdr->size_bytes), checksum)) {
+			dev_err(adev->dev, "invalid mall data table checksum\n");
+			r = -EINVAL;
+			goto out;
+		}
+	}
+
+	return 0;
+
+out:
+	kfree(adev->discovery.bin);
+	adev->discovery.bin = NULL;
+	if ((amdgpu_discovery != 2) &&
+	    (RREG32(mmIP_DISCOVERY_VERSION) == 4))
+		amdgpu_ras_query_boot_status(adev, 4);
+	return r;
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev);
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev)
+{
+	amdgpu_discovery_sysfs_fini(adev);
+	kfree(adev->discovery.bin);
+	adev->discovery.bin = NULL;
+}
+
+static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
+					uint8_t instance, uint16_t hw_id)
+{
+	if (instance >= HWIP_MAX_INSTANCE) {
+		dev_err(adev->dev,
+			"Unexpected instance_number (%d) from ip discovery blob\n",
+			instance);
+		return -EINVAL;
+	}
+	if (hw_id >= HW_ID_MAX) {
+		dev_err(adev->dev,
+			"Unexpected hw_id (%d) from ip discovery blob\n",
+			hw_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
+						uint32_t *vcn_harvest_count)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct die_header *dhdr;
+	struct ip *ip;
+	uint16_t die_offset, ip_offset, num_dies, num_ips;
+	uint16_t hw_id;
+	uint8_t inst;
+	int i, j;
+
+	bhdr = (struct binary_header *)discovery_bin;
+	ihdr = (struct ip_discovery_header
+			*)(discovery_bin +
+			   le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+	num_dies = le16_to_cpu(ihdr->num_dies);
+
+	/* scan harvest bit of all IP data structures */
+	for (i = 0; i < num_dies; i++) {
+		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+		dhdr = (struct die_header *)(discovery_bin + die_offset);
+		num_ips = le16_to_cpu(dhdr->num_ips);
+		ip_offset = die_offset + sizeof(*dhdr);
+
+		for (j = 0; j < num_ips; j++) {
+			ip = (struct ip *)(discovery_bin + ip_offset);
+			inst = ip->number_instance;
+			hw_id = le16_to_cpu(ip->hw_id);
+			if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
+				goto next_ip;
+
+			if (ip->harvest == 1) {
+				switch (hw_id) {
+				case VCN_HWID:
+					(*vcn_harvest_count)++;
+					if (inst == 0) {
+						adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
+						adev->vcn.inst_mask &=
+							~AMDGPU_VCN_HARVEST_VCN0;
+						adev->jpeg.inst_mask &=
+							~AMDGPU_VCN_HARVEST_VCN0;
+					} else {
+						adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1;
+						adev->vcn.inst_mask &=
+							~AMDGPU_VCN_HARVEST_VCN1;
+						adev->jpeg.inst_mask &=
+							~AMDGPU_VCN_HARVEST_VCN1;
+					}
+					break;
+				case DMU_HWID:
+					adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+					break;
+				default:
+					break;
+				}
+			}
+next_ip:
+			ip_offset += struct_size(ip, base_address,
+						 ip->num_base_address);
+		}
+	}
+}
+
+static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
+						     uint32_t *vcn_harvest_count,
+						     uint32_t *umc_harvest_count)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	struct harvest_table *harvest_info;
+	u16 offset;
+	int i;
+	uint32_t umc_harvest_config = 0;
+
+	bhdr = (struct binary_header *)discovery_bin;
+	offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+
+	if (!offset) {
+		dev_err(adev->dev, "invalid harvest table offset\n");
+		return;
+	}
+
+	harvest_info = (struct harvest_table *)(discovery_bin + offset);
+
+	for (i = 0; i < 32; i++) {
+		if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
+			break;
+
+		switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
+		case VCN_HWID:
+			(*vcn_harvest_count)++;
+			adev->vcn.harvest_config |=
+				(1 << harvest_info->list[i].number_instance);
+			adev->jpeg.harvest_config |=
+				(1 << harvest_info->list[i].number_instance);
+
+			adev->vcn.inst_mask &=
+				~(1U << harvest_info->list[i].number_instance);
+			adev->jpeg.inst_mask &=
+				~(1U << harvest_info->list[i].number_instance);
+			break;
+		case DMU_HWID:
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+			break;
+		case UMC_HWID:
+			umc_harvest_config |=
+				1 << (le16_to_cpu(harvest_info->list[i].number_instance));
+			(*umc_harvest_count)++;
+			break;
+		case GC_HWID:
+			adev->gfx.xcc_mask &=
+				~(1U << harvest_info->list[i].number_instance);
+			break;
+		case SDMA0_HWID:
+			adev->sdma.sdma_mask &=
+				~(1U << harvest_info->list[i].number_instance);
+			break;
+#if defined(CONFIG_DRM_AMD_ISP)
+		case ISP_HWID:
+			adev->isp.harvest_config |=
+				~(1U << harvest_info->list[i].number_instance);
+			break;
+#endif
+		default:
+			break;
+		}
+	}
+
+	adev->umc.active_mask = ((1 << adev->umc.node_inst_num) - 1) &
+				~umc_harvest_config;
+}
+
+/* ================================================== */
+
+struct ip_hw_instance {
+	struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */
+
+	int hw_id;
+	u8  num_instance;
+	u8  major, minor, revision;
+	u8  harvest;
+
+	int num_base_addresses;
+	u32 base_addr[] __counted_by(num_base_addresses);
+};
+
+struct ip_hw_id {
+	struct kset hw_id_kset;  /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */
+	int hw_id;
+};
+
+struct ip_die_entry {
+	struct kset ip_kset;     /* ip_discovery/die/#die/, contains ip_hw_id  */
+	u16 num_ips;
+};
+
+/* -------------------------------------------------- */
+
+struct ip_hw_instance_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf);
+};
+
+static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id);
+}
+
+static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance);
+}
+
+static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->major);
+}
+
+static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->minor);
+}
+
+static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->revision);
+}
+
+static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest);
+}
+
+static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses);
+}
+
+static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf)
+{
+	ssize_t res, at;
+	int ii;
+
+	for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) {
+		/* Here we satisfy the condition that, at + size <= PAGE_SIZE.
+		 */
+		if (at + 12 > PAGE_SIZE)
+			break;
+		res = sysfs_emit_at(buf, at, "0x%08X\n",
+				    ip_hw_instance->base_addr[ii]);
+		if (res <= 0)
+			break;
+		at += res;
+	}
+
+	return res < 0 ? res : at;
+}
+
+static struct ip_hw_instance_attr ip_hw_attr[] = {
+	__ATTR_RO(hw_id),
+	__ATTR_RO(num_instance),
+	__ATTR_RO(major),
+	__ATTR_RO(minor),
+	__ATTR_RO(revision),
+	__ATTR_RO(harvest),
+	__ATTR_RO(num_base_addresses),
+	__ATTR_RO(base_addr),
+};
+
+static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1];
+ATTRIBUTE_GROUPS(ip_hw_instance);
+
+#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj)
+#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr)
+
+static ssize_t ip_hw_instance_attr_show(struct kobject *kobj,
+					struct attribute *attr,
+					char *buf)
+{
+	struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+	struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr);
+
+	if (!ip_hw_attr->show)
+		return -EIO;
+
+	return ip_hw_attr->show(ip_hw_instance, buf);
+}
+
+static const struct sysfs_ops ip_hw_instance_sysfs_ops = {
+	.show = ip_hw_instance_attr_show,
+};
+
+static void ip_hw_instance_release(struct kobject *kobj)
+{
+	struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj);
+
+	kfree(ip_hw_instance);
+}
+
+static const struct kobj_type ip_hw_instance_ktype = {
+	.release = ip_hw_instance_release,
+	.sysfs_ops = &ip_hw_instance_sysfs_ops,
+	.default_groups = ip_hw_instance_groups,
+};
+
+/* -------------------------------------------------- */
+
+#define to_ip_hw_id(x)  container_of(to_kset(x), struct ip_hw_id, hw_id_kset)
+
+static void ip_hw_id_release(struct kobject *kobj)
+{
+	struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj);
+
+	if (!list_empty(&ip_hw_id->hw_id_kset.list))
+		DRM_ERROR("ip_hw_id->hw_id_kset is not empty");
+	kfree(ip_hw_id);
+}
+
+static const struct kobj_type ip_hw_id_ktype = {
+	.release = ip_hw_id_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+/* -------------------------------------------------- */
+
+static void die_kobj_release(struct kobject *kobj);
+static void ip_disc_release(struct kobject *kobj);
+
+struct ip_die_entry_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf);
+};
+
+#define to_ip_die_entry_attr(x)  container_of(x, struct ip_die_entry_attribute, attr)
+
+static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips);
+}
+
+/* If there are more ip_die_entry attrs, other than the number of IPs,
+ * we can make this intro an array of attrs, and then initialize
+ * ip_die_entry_attrs in a loop.
+ */
+static struct ip_die_entry_attribute num_ips_attr =
+	__ATTR_RO(num_ips);
+
+static struct attribute *ip_die_entry_attrs[] = {
+	&num_ips_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */
+
+#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset)
+
+static ssize_t ip_die_entry_attr_show(struct kobject *kobj,
+				      struct attribute *attr,
+				      char *buf)
+{
+	struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr);
+	struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+	if (!ip_die_entry_attr->show)
+		return -EIO;
+
+	return ip_die_entry_attr->show(ip_die_entry, buf);
+}
+
+static void ip_die_entry_release(struct kobject *kobj)
+{
+	struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj);
+
+	if (!list_empty(&ip_die_entry->ip_kset.list))
+		DRM_ERROR("ip_die_entry->ip_kset is not empty");
+	kfree(ip_die_entry);
+}
+
+static const struct sysfs_ops ip_die_entry_sysfs_ops = {
+	.show = ip_die_entry_attr_show,
+};
+
+static const struct kobj_type ip_die_entry_ktype = {
+	.release = ip_die_entry_release,
+	.sysfs_ops = &ip_die_entry_sysfs_ops,
+	.default_groups = ip_die_entry_groups,
+};
+
+static const struct kobj_type die_kobj_ktype = {
+	.release = die_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static const struct kobj_type ip_discovery_ktype = {
+	.release = ip_disc_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+struct ip_discovery_top {
+	struct kobject kobj;    /* ip_discovery/ */
+	struct kset die_kset;   /* ip_discovery/die/, contains ip_die_entry */
+	struct amdgpu_device *adev;
+};
+
+static void die_kobj_release(struct kobject *kobj)
+{
+	struct ip_discovery_top *ip_top = container_of(to_kset(kobj),
+						       struct ip_discovery_top,
+						       die_kset);
+	if (!list_empty(&ip_top->die_kset.list))
+		DRM_ERROR("ip_top->die_kset is not empty");
+}
+
+static void ip_disc_release(struct kobject *kobj)
+{
+	struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top,
+						       kobj);
+	struct amdgpu_device *adev = ip_top->adev;
+
+	kfree(ip_top);
+	adev->discovery.ip_top = NULL;
+}
+
+static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
+						 uint16_t hw_id, uint8_t inst)
+{
+	uint8_t harvest = 0;
+
+	/* Until a uniform way is figured, get mask based on hwid */
+	switch (hw_id) {
+	case VCN_HWID:
+		/* VCN vs UVD+VCE */
+		if (!amdgpu_ip_version(adev, VCE_HWIP, 0))
+			harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
+		break;
+	case DMU_HWID:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
+			harvest = 0x1;
+		break;
+	case UMC_HWID:
+		/* TODO: It needs another parsing; for now, ignore.*/
+		break;
+	case GC_HWID:
+		harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0;
+		break;
+	case SDMA0_HWID:
+		harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0;
+		break;
+	default:
+		break;
+	}
+
+	return harvest;
+}
+
+static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
+				      struct ip_die_entry *ip_die_entry,
+				      const size_t _ip_offset, const int num_ips,
+				      bool reg_base_64)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	int ii, jj, kk, res;
+	uint16_t hw_id;
+	uint8_t inst;
+
+	DRM_DEBUG("num_ips:%d", num_ips);
+
+	/* Find all IPs of a given HW ID, and add their instance to
+	 * #die/#hw_id/#instance/<attributes>
+	 */
+	for (ii = 0; ii < HW_ID_MAX; ii++) {
+		struct ip_hw_id *ip_hw_id = NULL;
+		size_t ip_offset = _ip_offset;
+
+		for (jj = 0; jj < num_ips; jj++) {
+			struct ip_v4 *ip;
+			struct ip_hw_instance *ip_hw_instance;
+
+			ip = (struct ip_v4 *)(discovery_bin + ip_offset);
+			inst = ip->instance_number;
+			hw_id = le16_to_cpu(ip->hw_id);
+			if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
+			    hw_id != ii)
+				goto next_ip;
+
+			DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
+
+			/* We have a hw_id match; register the hw
+			 * block if not yet registered.
+			 */
+			if (!ip_hw_id) {
+				ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL);
+				if (!ip_hw_id)
+					return -ENOMEM;
+				ip_hw_id->hw_id = ii;
+
+				kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii);
+				ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset;
+				ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype;
+				res = kset_register(&ip_hw_id->hw_id_kset);
+				if (res) {
+					DRM_ERROR("Couldn't register ip_hw_id kset");
+					kfree(ip_hw_id);
+					return res;
+				}
+				if (hw_id_names[ii]) {
+					res = sysfs_create_link(&ip_die_entry->ip_kset.kobj,
+								&ip_hw_id->hw_id_kset.kobj,
+								hw_id_names[ii]);
+					if (res) {
+						DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n",
+							  hw_id_names[ii],
+							  kobject_name(&ip_die_entry->ip_kset.kobj));
+					}
+				}
+			}
+
+			/* Now register its instance.
+			 */
+			ip_hw_instance = kzalloc(struct_size(ip_hw_instance,
+							     base_addr,
+							     ip->num_base_address),
+						 GFP_KERNEL);
+			if (!ip_hw_instance) {
+				DRM_ERROR("no memory for ip_hw_instance");
+				return -ENOMEM;
+			}
+			ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */
+			ip_hw_instance->num_instance = ip->instance_number;
+			ip_hw_instance->major = ip->major;
+			ip_hw_instance->minor = ip->minor;
+			ip_hw_instance->revision = ip->revision;
+			ip_hw_instance->harvest =
+				amdgpu_discovery_get_harvest_info(
+					adev, ip_hw_instance->hw_id,
+					ip_hw_instance->num_instance);
+			ip_hw_instance->num_base_addresses = ip->num_base_address;
+
+			for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) {
+				if (reg_base_64)
+					ip_hw_instance->base_addr[kk] =
+						lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF;
+				else
+					ip_hw_instance->base_addr[kk] = ip->base_address[kk];
+			}
+
+			kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype);
+			ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset;
+			res = kobject_add(&ip_hw_instance->kobj, NULL,
+					  "%d", ip_hw_instance->num_instance);
+next_ip:
+			if (reg_base_64)
+				ip_offset += struct_size(ip, base_address_64,
+							 ip->num_base_address);
+			else
+				ip_offset += struct_size(ip, base_address,
+							 ip->num_base_address);
+		}
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev)
+{
+	struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct die_header *dhdr;
+	struct kset *die_kset = &ip_top->die_kset;
+	u16 num_dies, die_offset, num_ips;
+	size_t ip_offset;
+	int ii, res;
+
+	bhdr = (struct binary_header *)discovery_bin;
+	ihdr = (struct ip_discovery_header
+			*)(discovery_bin +
+			   le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+	num_dies = le16_to_cpu(ihdr->num_dies);
+
+	DRM_DEBUG("number of dies: %d\n", num_dies);
+
+	for (ii = 0; ii < num_dies; ii++) {
+		struct ip_die_entry *ip_die_entry;
+
+		die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset);
+		dhdr = (struct die_header *)(discovery_bin + die_offset);
+		num_ips = le16_to_cpu(dhdr->num_ips);
+		ip_offset = die_offset + sizeof(*dhdr);
+
+		/* Add the die to the kset.
+		 *
+		 * dhdr->die_id == ii, which was checked in
+		 * amdgpu_discovery_reg_base_init().
+		 */
+
+		ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL);
+		if (!ip_die_entry)
+			return -ENOMEM;
+
+		ip_die_entry->num_ips = num_ips;
+
+		kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id));
+		ip_die_entry->ip_kset.kobj.kset = die_kset;
+		ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype;
+		res = kset_register(&ip_die_entry->ip_kset);
+		if (res) {
+			DRM_ERROR("Couldn't register ip_die_entry kset");
+			kfree(ip_die_entry);
+			return res;
+		}
+
+		amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit);
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct ip_discovery_top *ip_top;
+	struct kset *die_kset;
+	int res, ii;
+
+	if (!discovery_bin)
+		return -EINVAL;
+
+	ip_top = kzalloc(sizeof(*ip_top), GFP_KERNEL);
+	if (!ip_top)
+		return -ENOMEM;
+
+	ip_top->adev = adev;
+	adev->discovery.ip_top = ip_top;
+	res = kobject_init_and_add(&ip_top->kobj, &ip_discovery_ktype,
+				   &adev->dev->kobj, "ip_discovery");
+	if (res) {
+		DRM_ERROR("Couldn't init and add ip_discovery/");
+		goto Err;
+	}
+
+	die_kset = &ip_top->die_kset;
+	kobject_set_name(&die_kset->kobj, "%s", "die");
+	die_kset->kobj.parent = &ip_top->kobj;
+	die_kset->kobj.ktype = &die_kobj_ktype;
+	res = kset_register(&ip_top->die_kset);
+	if (res) {
+		DRM_ERROR("Couldn't register die_kset");
+		goto Err;
+	}
+
+	for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++)
+		ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr;
+	ip_hw_instance_attrs[ii] = NULL;
+
+	res = amdgpu_discovery_sysfs_recurse(adev);
+
+	return res;
+Err:
+	kobject_put(&ip_top->kobj);
+	return res;
+}
+
+/* -------------------------------------------------- */
+
+#define list_to_kobj(el) container_of(el, struct kobject, entry)
+
+static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id)
+{
+	struct list_head *el, *tmp;
+	struct kset *hw_id_kset;
+
+	hw_id_kset = &ip_hw_id->hw_id_kset;
+	spin_lock(&hw_id_kset->list_lock);
+	list_for_each_prev_safe(el, tmp, &hw_id_kset->list) {
+		list_del_init(el);
+		spin_unlock(&hw_id_kset->list_lock);
+		/* kobject is embedded in ip_hw_instance */
+		kobject_put(list_to_kobj(el));
+		spin_lock(&hw_id_kset->list_lock);
+	}
+	spin_unlock(&hw_id_kset->list_lock);
+	kobject_put(&ip_hw_id->hw_id_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry)
+{
+	struct list_head *el, *tmp;
+	struct kset *ip_kset;
+
+	ip_kset = &ip_die_entry->ip_kset;
+	spin_lock(&ip_kset->list_lock);
+	list_for_each_prev_safe(el, tmp, &ip_kset->list) {
+		list_del_init(el);
+		spin_unlock(&ip_kset->list_lock);
+		amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el)));
+		spin_lock(&ip_kset->list_lock);
+	}
+	spin_unlock(&ip_kset->list_lock);
+	kobject_put(&ip_die_entry->ip_kset.kobj);
+}
+
+static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
+{
+	struct ip_discovery_top *ip_top = adev->discovery.ip_top;
+	struct list_head *el, *tmp;
+	struct kset *die_kset;
+
+	die_kset = &ip_top->die_kset;
+	spin_lock(&die_kset->list_lock);
+	list_for_each_prev_safe(el, tmp, &die_kset->list) {
+		list_del_init(el);
+		spin_unlock(&die_kset->list_lock);
+		amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el)));
+		spin_lock(&die_kset->list_lock);
+	}
+	spin_unlock(&die_kset->list_lock);
+	kobject_put(&ip_top->die_kset.kobj);
+	kobject_put(&ip_top->kobj);
+}
+
+/* ================================================== */
+
+static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+	uint8_t num_base_address, subrev, variant;
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct die_header *dhdr;
+	uint8_t *discovery_bin;
+	struct ip_v4 *ip;
+	uint16_t die_offset;
+	uint16_t ip_offset;
+	uint16_t num_dies;
+	uint32_t wafl_ver;
+	uint16_t num_ips;
+	uint16_t hw_id;
+	uint8_t inst;
+	int hw_ip;
+	int i, j, k;
+	int r;
+
+	r = amdgpu_discovery_init(adev);
+	if (r)
+		return r;
+	discovery_bin = adev->discovery.bin;
+	wafl_ver = 0;
+	adev->gfx.xcc_mask = 0;
+	adev->sdma.sdma_mask = 0;
+	adev->vcn.inst_mask = 0;
+	adev->jpeg.inst_mask = 0;
+	bhdr = (struct binary_header *)discovery_bin;
+	ihdr = (struct ip_discovery_header
+			*)(discovery_bin +
+			   le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+	num_dies = le16_to_cpu(ihdr->num_dies);
+
+	DRM_DEBUG("number of dies: %d\n", num_dies);
+
+	for (i = 0; i < num_dies; i++) {
+		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+		dhdr = (struct die_header *)(discovery_bin + die_offset);
+		num_ips = le16_to_cpu(dhdr->num_ips);
+		ip_offset = die_offset + sizeof(*dhdr);
+
+		if (le16_to_cpu(dhdr->die_id) != i) {
+			DRM_ERROR("invalid die id %d, expected %d\n",
+					le16_to_cpu(dhdr->die_id), i);
+			return -EINVAL;
+		}
+
+		DRM_DEBUG("number of hardware IPs on die%d: %d\n",
+				le16_to_cpu(dhdr->die_id), num_ips);
+
+		for (j = 0; j < num_ips; j++) {
+			ip = (struct ip_v4 *)(discovery_bin + ip_offset);
+
+			inst = ip->instance_number;
+			hw_id = le16_to_cpu(ip->hw_id);
+			if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
+				goto next_ip;
+
+			num_base_address = ip->num_base_address;
+
+			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
+				  hw_id_names[le16_to_cpu(ip->hw_id)],
+				  le16_to_cpu(ip->hw_id),
+				  ip->instance_number,
+				  ip->major, ip->minor,
+				  ip->revision);
+
+			if (le16_to_cpu(ip->hw_id) == VCN_HWID) {
+				/* Bit [5:0]: original revision value
+				 * Bit [7:6]: en/decode capability:
+				 *     0b00 : VCN function normally
+				 *     0b10 : encode is disabled
+				 *     0b01 : decode is disabled
+				 */
+				if (adev->vcn.num_vcn_inst <
+				    AMDGPU_MAX_VCN_INSTANCES) {
+					adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
+						ip->revision & 0xc0;
+					adev->vcn.num_vcn_inst++;
+					adev->vcn.inst_mask |=
+						(1U << ip->instance_number);
+					adev->jpeg.inst_mask |=
+						(1U << ip->instance_number);
+				} else {
+					dev_err(adev->dev, "Too many VCN instances: %d vs %d\n",
+						adev->vcn.num_vcn_inst + 1,
+						AMDGPU_MAX_VCN_INSTANCES);
+				}
+				ip->revision &= ~0xc0;
+			}
+			if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
+			    le16_to_cpu(ip->hw_id) == SDMA1_HWID ||
+			    le16_to_cpu(ip->hw_id) == SDMA2_HWID ||
+			    le16_to_cpu(ip->hw_id) == SDMA3_HWID) {
+				if (adev->sdma.num_instances <
+				    AMDGPU_MAX_SDMA_INSTANCES) {
+					adev->sdma.num_instances++;
+					adev->sdma.sdma_mask |=
+						(1U << ip->instance_number);
+				} else {
+					dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n",
+						adev->sdma.num_instances + 1,
+						AMDGPU_MAX_SDMA_INSTANCES);
+				}
+			}
+
+			if (le16_to_cpu(ip->hw_id) == VPE_HWID) {
+				if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES)
+					adev->vpe.num_instances++;
+				else
+					dev_err(adev->dev, "Too many VPE instances: %d vs %d\n",
+						adev->vpe.num_instances + 1,
+						AMDGPU_MAX_VPE_INSTANCES);
+			}
+
+			if (le16_to_cpu(ip->hw_id) == UMC_HWID) {
+				adev->gmc.num_umc++;
+				adev->umc.node_inst_num++;
+			}
+
+			if (le16_to_cpu(ip->hw_id) == GC_HWID)
+				adev->gfx.xcc_mask |=
+					(1U << ip->instance_number);
+
+			if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
+				wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
+							   ip->revision, 0, 0);
+
+			for (k = 0; k < num_base_address; k++) {
+				/*
+				 * convert the endianness of base addresses in place,
+				 * so that we don't need to convert them when accessing adev->reg_offset.
+				 */
+				if (ihdr->base_addr_64_bit)
+					/* Truncate the 64bit base address from ip discovery
+					 * and only store lower 32bit ip base in reg_offset[].
+					 * Bits > 32 follows ASIC specific format, thus just
+					 * discard them and handle it within specific ASIC.
+					 * By this way reg_offset[] and related helpers can
+					 * stay unchanged.
+					 * The base address is in dwords, thus clear the
+					 * highest 2 bits to store.
+					 */
+					ip->base_address[k] =
+						lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF;
+				else
+					ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+				DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
+			}
+
+			for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
+				if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) &&
+				    hw_id_map[hw_ip] != 0) {
+					DRM_DEBUG("set register base offset for %s\n",
+							hw_id_names[le16_to_cpu(ip->hw_id)]);
+					adev->reg_offset[hw_ip][ip->instance_number] =
+						ip->base_address;
+					/* Instance support is somewhat inconsistent.
+					 * SDMA is a good example.  Sienna cichlid has 4 total
+					 * SDMA instances, each enumerated separately (HWIDs
+					 * 42, 43, 68, 69).  Arcturus has 8 total SDMA instances,
+					 * but they are enumerated as multiple instances of the
+					 * same HWIDs (4x HWID 42, 4x HWID 43).  UMC is another
+					 * example.  On most chips there are multiple instances
+					 * with the same HWID.
+					 */
+
+					if (ihdr->version < 3) {
+						subrev = 0;
+						variant = 0;
+					} else {
+						subrev = ip->sub_revision;
+						variant = ip->variant;
+					}
+
+					adev->ip_versions[hw_ip]
+							 [ip->instance_number] =
+						IP_VERSION_FULL(ip->major,
+								ip->minor,
+								ip->revision,
+								variant,
+								subrev);
+				}
+			}
+
+next_ip:
+			if (ihdr->base_addr_64_bit)
+				ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
+			else
+				ip_offset += struct_size(ip, base_address, ip->num_base_address);
+		}
+	}
+
+	if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
+		adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
+
+	return 0;
+}
+
+static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct ip_discovery_header *ihdr;
+	struct binary_header *bhdr;
+	int vcn_harvest_count = 0;
+	int umc_harvest_count = 0;
+	uint16_t offset, ihdr_ver;
+
+	bhdr = (struct binary_header *)discovery_bin;
+	offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
+	ihdr = (struct ip_discovery_header *)(discovery_bin + offset);
+	ihdr_ver = le16_to_cpu(ihdr->version);
+	/*
+	 * Harvest table does not fit Navi1x and legacy GPUs,
+	 * so read harvest bit per IP data structure to set
+	 * harvest configuration.
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
+	    ihdr_ver <= 2) {
+		if ((adev->pdev->device == 0x731E &&
+			(adev->pdev->revision == 0xC6 ||
+			 adev->pdev->revision == 0xC7)) ||
+			(adev->pdev->device == 0x7340 &&
+			 adev->pdev->revision == 0xC9) ||
+			(adev->pdev->device == 0x7360 &&
+			 adev->pdev->revision == 0xC7))
+			amdgpu_discovery_read_harvest_bit_per_ip(adev,
+				&vcn_harvest_count);
+	} else {
+		amdgpu_discovery_read_from_harvest_table(adev,
+							 &vcn_harvest_count,
+							 &umc_harvest_count);
+	}
+
+	amdgpu_discovery_harvest_config_quirk(adev);
+
+	if (vcn_harvest_count == adev->vcn.num_vcn_inst) {
+		adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+		adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+	}
+
+	if (umc_harvest_count < adev->gmc.num_umc) {
+		adev->gmc.num_umc -= umc_harvest_count;
+	}
+}
+
+union gc_info {
+	struct gc_info_v1_0 v1;
+	struct gc_info_v1_1 v1_1;
+	struct gc_info_v1_2 v1_2;
+	struct gc_info_v1_3 v1_3;
+	struct gc_info_v2_0 v2;
+	struct gc_info_v2_1 v2_1;
+};
+
+static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	union gc_info *gc_info;
+	u16 offset;
+
+	if (!discovery_bin) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)discovery_bin;
+	offset = le16_to_cpu(bhdr->table_list[GC].offset);
+
+	if (!offset)
+		return 0;
+
+	gc_info = (union gc_info *)(discovery_bin + offset);
+
+	switch (le16_to_cpu(gc_info->v1.header.version_major)) {
+	case 1:
+		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
+		adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
+						      le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
+		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
+		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
+		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
+		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
+		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
+		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
+		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
+		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
+		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
+		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
+		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
+		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
+			le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
+		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+		if (le16_to_cpu(gc_info->v1.header.version_minor) >= 1) {
+			adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
+			adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
+			adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
+		}
+		if (le16_to_cpu(gc_info->v1.header.version_minor) >= 2) {
+			adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
+			adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
+			adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
+			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc);
+			adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc);
+			adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa);
+			adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
+			adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
+		}
+		if (le16_to_cpu(gc_info->v1.header.version_minor) >= 3) {
+			adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v1_3.gc_tcp_size_per_cu);
+			adev->gfx.config.gc_tcp_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcp_cache_line_size);
+			adev->gfx.config.gc_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_size_per_sqc);
+			adev->gfx.config.gc_instruction_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_instruction_cache_line_size);
+			adev->gfx.config.gc_scalar_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_size_per_sqc);
+			adev->gfx.config.gc_scalar_data_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_scalar_data_cache_line_size);
+			adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v1_3.gc_tcc_size);
+			adev->gfx.config.gc_tcc_cache_line_size = le32_to_cpu(gc_info->v1_3.gc_tcc_cache_line_size);
+		}
+		break;
+	case 2:
+		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
+		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
+		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
+		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
+		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
+		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
+		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
+		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
+		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
+		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
+		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
+		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
+		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
+		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
+			le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
+		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+		if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) {
+			adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+			adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+			adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+			adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+			adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+			adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+		}
+		break;
+	default:
+		dev_err(adev->dev,
+			"Unhandled GC info table %d.%d\n",
+			le16_to_cpu(gc_info->v1.header.version_major),
+			le16_to_cpu(gc_info->v1.header.version_minor));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+union mall_info {
+	struct mall_info_v1_0 v1;
+	struct mall_info_v2_0 v2;
+};
+
+static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	union mall_info *mall_info;
+	u32 u, mall_size_per_umc, m_s_present, half_use;
+	u64 mall_size;
+	u16 offset;
+
+	if (!discovery_bin) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)discovery_bin;
+	offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+
+	if (!offset)
+		return 0;
+
+	mall_info = (union mall_info *)(discovery_bin + offset);
+
+	switch (le16_to_cpu(mall_info->v1.header.version_major)) {
+	case 1:
+		mall_size = 0;
+		mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m);
+		m_s_present = le32_to_cpu(mall_info->v1.m_s_present);
+		half_use = le32_to_cpu(mall_info->v1.m_half_use);
+		for (u = 0; u < adev->gmc.num_umc; u++) {
+			if (m_s_present & (1 << u))
+				mall_size += mall_size_per_umc * 2;
+			else if (half_use & (1 << u))
+				mall_size += mall_size_per_umc / 2;
+			else
+				mall_size += mall_size_per_umc;
+		}
+		adev->gmc.mall_size = mall_size;
+		adev->gmc.m_half_use = half_use;
+		break;
+	case 2:
+		mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+		adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc;
+		break;
+	default:
+		dev_err(adev->dev,
+			"Unhandled MALL info table %d.%d\n",
+			le16_to_cpu(mall_info->v1.header.version_major),
+			le16_to_cpu(mall_info->v1.header.version_minor));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+union vcn_info {
+	struct vcn_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct binary_header *bhdr;
+	union vcn_info *vcn_info;
+	u16 offset;
+	int v;
+
+	if (!discovery_bin) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+	 * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
+	 * but that may change in the future with new GPUs so keep this
+	 * check for defensive purposes.
+	 */
+	if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
+		dev_err(adev->dev, "invalid vcn instances\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)discovery_bin;
+	offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+
+	if (!offset)
+		return 0;
+
+	vcn_info = (union vcn_info *)(discovery_bin + offset);
+
+	switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
+	case 1:
+		/* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+		 * so this won't overflow.
+		 */
+		for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
+			adev->vcn.inst[v].vcn_codec_disable_mask =
+				le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
+		}
+		break;
+	default:
+		dev_err(adev->dev,
+			"Unhandled VCN info table %d.%d\n",
+			le16_to_cpu(vcn_info->v1.header.version_major),
+			le16_to_cpu(vcn_info->v1.header.version_minor));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+union nps_info {
+	struct nps_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
+					     union nps_info *nps_data)
+{
+	uint64_t vram_size, pos, offset;
+	struct nps_info_header *nhdr;
+	struct binary_header bhdr;
+	uint16_t checksum;
+
+	vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+	pos = vram_size - DISCOVERY_TMR_OFFSET;
+	amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+	offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+	checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+	amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+				  sizeof(*nps_data), false);
+
+	nhdr = (struct nps_info_header *)(nps_data);
+	if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+					      le32_to_cpu(nhdr->size_bytes),
+					      checksum)) {
+		dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+				  uint32_t *nps_type,
+				  struct amdgpu_gmc_memrange **ranges,
+				  int *range_cnt, bool refresh)
+{
+	uint8_t *discovery_bin = adev->discovery.bin;
+	struct amdgpu_gmc_memrange *mem_ranges;
+	struct binary_header *bhdr;
+	union nps_info *nps_info;
+	union nps_info nps_data;
+	u16 offset;
+	int i, r;
+
+	if (!nps_type || !range_cnt || !ranges)
+		return -EINVAL;
+
+	if (refresh) {
+		r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+		if (r)
+			return r;
+		nps_info = &nps_data;
+	} else {
+		if (!discovery_bin) {
+			dev_err(adev->dev,
+				"fetch mem range failed, ip discovery uninitialized\n");
+			return -EINVAL;
+		}
+
+		bhdr = (struct binary_header *)discovery_bin;
+		offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+
+		if (!offset)
+			return -ENOENT;
+
+		/* If verification fails, return as if NPS table doesn't exist */
+		if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+			return -ENOENT;
+
+		nps_info = (union nps_info *)(discovery_bin + offset);
+	}
+
+	switch (le16_to_cpu(nps_info->v1.header.version_major)) {
+	case 1:
+		mem_ranges = kvcalloc(nps_info->v1.count,
+				      sizeof(*mem_ranges),
+				      GFP_KERNEL);
+		if (!mem_ranges)
+			return -ENOMEM;
+		*nps_type = nps_info->v1.nps_type;
+		*range_cnt = nps_info->v1.count;
+		for (i = 0; i < *range_cnt; i++) {
+			mem_ranges[i].base_address =
+				nps_info->v1.instance_info[i].base_address;
+			mem_ranges[i].limit_address =
+				nps_info->v1.instance_info[i].limit_address;
+			mem_ranges[i].nid_mask = -1;
+			mem_ranges[i].flags = 0;
+		}
+		*ranges = mem_ranges;
+		break;
+	default:
+		dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
+			le16_to_cpu(nps_info->v1.header.version_major),
+			le16_to_cpu(nps_info->v1.header.version_minor));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
+{
+	/* what IP to use for this? */
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 0, 1):
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 7):
+		amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
+		break;
+	default:
+		dev_err(adev->dev,
+			"Failed to add common ip block(GC_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, GC_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
+{
+	/* use GC or MMHUB IP version */
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 0, 1):
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 7):
+		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, GC_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) {
+	case IP_VERSION(4, 0, 0):
+	case IP_VERSION(4, 0, 1):
+	case IP_VERSION(4, 1, 0):
+	case IP_VERSION(4, 1, 1):
+	case IP_VERSION(4, 3, 0):
+		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+		break;
+	case IP_VERSION(4, 2, 0):
+	case IP_VERSION(4, 2, 1):
+	case IP_VERSION(4, 4, 0):
+	case IP_VERSION(4, 4, 2):
+	case IP_VERSION(4, 4, 5):
+		amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
+		break;
+	case IP_VERSION(5, 0, 0):
+	case IP_VERSION(5, 0, 1):
+	case IP_VERSION(5, 0, 2):
+	case IP_VERSION(5, 0, 3):
+	case IP_VERSION(5, 2, 0):
+	case IP_VERSION(5, 2, 1):
+		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+		break;
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 0, 2):
+		amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block);
+		break;
+	case IP_VERSION(6, 1, 0):
+		amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block);
+		break;
+	case IP_VERSION(7, 0, 0):
+		amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev,
+			"Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, OSSSYS_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+		amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+		break;
+	case IP_VERSION(10, 0, 0):
+	case IP_VERSION(10, 0, 1):
+		amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 0, 5):
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 0, 12):
+	case IP_VERSION(11, 0, 13):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 2):
+		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+		break;
+	case IP_VERSION(11, 0, 8):
+		amdgpu_device_ip_block_add(adev, &psp_v11_0_8_ip_block);
+		break;
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block);
+		break;
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 1):
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 3):
+	case IP_VERSION(13, 0, 5):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 7):
+	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+	case IP_VERSION(14, 0, 0):
+	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
+		amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
+		break;
+	case IP_VERSION(13, 0, 4):
+		amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
+		break;
+	case IP_VERSION(14, 0, 2):
+	case IP_VERSION(14, 0, 3):
+	case IP_VERSION(14, 0, 5):
+		amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev,
+			"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, MP0_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+	case IP_VERSION(10, 0, 0):
+	case IP_VERSION(10, 0, 1):
+	case IP_VERSION(11, 0, 2):
+		if (adev->asic_type == CHIP_ARCTURUS)
+			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+		else
+			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 5):
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 0, 12):
+	case IP_VERSION(11, 0, 13):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 2):
+		amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+		break;
+	case IP_VERSION(11, 0, 8):
+		if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
+			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
+		break;
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 1):
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 3):
+	case IP_VERSION(13, 0, 4):
+	case IP_VERSION(13, 0, 5):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 7):
+	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
+	case IP_VERSION(13, 0, 14):
+	case IP_VERSION(13, 0, 12):
+		amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
+		break;
+	case IP_VERSION(14, 0, 0):
+	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 2):
+	case IP_VERSION(14, 0, 3):
+	case IP_VERSION(14, 0, 4):
+	case IP_VERSION(14, 0, 5):
+		amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev,
+			"Failed to add smu ip block(MP1_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, MP1_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#if defined(CONFIG_DRM_AMD_DC)
+static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
+{
+	amdgpu_device_set_sriov_virtual_display(adev);
+	amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+}
+#endif
+
+static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
+{
+	if (adev->enable_virtual_display) {
+		amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+		return 0;
+	}
+
+	if (!amdgpu_device_has_dc_support(adev))
+		return 0;
+
+#if defined(CONFIG_DRM_AMD_DC)
+	if (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+		switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) {
+		case IP_VERSION(1, 0, 0):
+		case IP_VERSION(1, 0, 1):
+		case IP_VERSION(2, 0, 2):
+		case IP_VERSION(2, 0, 0):
+		case IP_VERSION(2, 0, 3):
+		case IP_VERSION(2, 1, 0):
+		case IP_VERSION(3, 0, 0):
+		case IP_VERSION(3, 0, 2):
+		case IP_VERSION(3, 0, 3):
+		case IP_VERSION(3, 0, 1):
+		case IP_VERSION(3, 1, 2):
+		case IP_VERSION(3, 1, 3):
+		case IP_VERSION(3, 1, 4):
+		case IP_VERSION(3, 1, 5):
+		case IP_VERSION(3, 1, 6):
+		case IP_VERSION(3, 2, 0):
+		case IP_VERSION(3, 2, 1):
+		case IP_VERSION(3, 5, 0):
+		case IP_VERSION(3, 5, 1):
+		case IP_VERSION(3, 6, 0):
+		case IP_VERSION(4, 1, 0):
+			/* TODO: Fix IP version. DC code expects version 4.0.1 */
+			if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
+				adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
+
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_discovery_set_sriov_display(adev);
+			else
+				amdgpu_device_ip_block_add(adev, &dm_ip_block);
+			break;
+		default:
+			dev_err(adev->dev,
+				"Failed to add dm ip block(DCE_HWIP:0x%x)\n",
+				amdgpu_ip_version(adev, DCE_HWIP, 0));
+			return -EINVAL;
+		}
+	} else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+		switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) {
+		case IP_VERSION(12, 0, 0):
+		case IP_VERSION(12, 0, 1):
+		case IP_VERSION(12, 1, 0):
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_discovery_set_sriov_display(adev);
+			else
+				amdgpu_device_ip_block_add(adev, &dm_ip_block);
+			break;
+		default:
+			dev_err(adev->dev,
+				"Failed to add dm ip block(DCI_HWIP:0x%x)\n",
+				amdgpu_ip_version(adev, DCI_HWIP, 0));
+			return -EINVAL;
+		}
+	}
+#endif
+	return 0;
+}
+
+static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 0, 1):
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+		break;
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, GC_HWIP, 0));
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+	case IP_VERSION(4, 0, 0):
+	case IP_VERSION(4, 0, 1):
+	case IP_VERSION(4, 1, 0):
+	case IP_VERSION(4, 1, 1):
+	case IP_VERSION(4, 1, 2):
+	case IP_VERSION(4, 2, 0):
+	case IP_VERSION(4, 2, 2):
+	case IP_VERSION(4, 4, 0):
+		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+		break;
+	case IP_VERSION(4, 4, 2):
+	case IP_VERSION(4, 4, 5):
+	case IP_VERSION(4, 4, 4):
+		amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
+		break;
+	case IP_VERSION(5, 0, 0):
+	case IP_VERSION(5, 0, 1):
+	case IP_VERSION(5, 0, 2):
+	case IP_VERSION(5, 0, 5):
+		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
+		break;
+	case IP_VERSION(5, 2, 0):
+	case IP_VERSION(5, 2, 2):
+	case IP_VERSION(5, 2, 4):
+	case IP_VERSION(5, 2, 5):
+	case IP_VERSION(5, 2, 6):
+	case IP_VERSION(5, 2, 3):
+	case IP_VERSION(5, 2, 1):
+	case IP_VERSION(5, 2, 7):
+		amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block);
+		break;
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 0, 2):
+	case IP_VERSION(6, 0, 3):
+	case IP_VERSION(6, 1, 0):
+	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 2):
+	case IP_VERSION(6, 1, 3):
+		amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
+		break;
+	case IP_VERSION(7, 0, 0):
+	case IP_VERSION(7, 0, 1):
+		amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
+		break;
+	default:
+		dev_err(adev->dev,
+			"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
+			amdgpu_ip_version(adev, SDMA0_HWIP, 0));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_set_ras_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		amdgpu_device_ip_block_add(adev, &ras_v1_0_ip_block);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
+{
+	if (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+		switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+		case IP_VERSION(7, 0, 0):
+		case IP_VERSION(7, 2, 0):
+			/* UVD is not supported on vega20 SR-IOV */
+			if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+				amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
+			break;
+		default:
+			dev_err(adev->dev,
+				"Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n",
+				amdgpu_ip_version(adev, UVD_HWIP, 0));
+			return -EINVAL;
+		}
+		switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) {
+		case IP_VERSION(4, 0, 0):
+		case IP_VERSION(4, 1, 0):
+			/* VCE is not supported on vega20 SR-IOV */
+			if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev)))
+				amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
+			break;
+		default:
+			dev_err(adev->dev,
+				"Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n",
+				amdgpu_ip_version(adev, VCE_HWIP, 0));
+			return -EINVAL;
+		}
+	} else {
+		switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+		case IP_VERSION(1, 0, 0):
+		case IP_VERSION(1, 0, 1):
+			amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
+			break;
+		case IP_VERSION(2, 0, 0):
+		case IP_VERSION(2, 0, 2):
+		case IP_VERSION(2, 2, 0):
+			amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+			if (!amdgpu_sriov_vf(adev))
+				amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
+			break;
+		case IP_VERSION(2, 0, 3):
+			break;
+		case IP_VERSION(2, 5, 0):
+			amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v2_5_ip_block);
+			break;
+		case IP_VERSION(2, 6, 0):
+			amdgpu_device_ip_block_add(adev, &vcn_v2_6_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v2_6_ip_block);
+			break;
+		case IP_VERSION(3, 0, 0):
+		case IP_VERSION(3, 0, 16):
+		case IP_VERSION(3, 1, 1):
+		case IP_VERSION(3, 1, 2):
+		case IP_VERSION(3, 0, 2):
+			amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+			if (!amdgpu_sriov_vf(adev))
+				amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
+			break;
+		case IP_VERSION(3, 0, 33):
+			amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
+			break;
+		case IP_VERSION(4, 0, 0):
+		case IP_VERSION(4, 0, 2):
+		case IP_VERSION(4, 0, 4):
+			amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
+			break;
+		case IP_VERSION(4, 0, 3):
+			amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block);
+			break;
+		case IP_VERSION(4, 0, 5):
+		case IP_VERSION(4, 0, 6):
+			amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block);
+			break;
+		case IP_VERSION(5, 0, 0):
+			amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
+			break;
+		case IP_VERSION(5, 0, 1):
+			amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+			amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+			break;
+		default:
+			dev_err(adev->dev,
+				"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
+				amdgpu_ip_version(adev, UVD_HWIP, 0));
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
+		adev->enable_mes = true;
+		adev->enable_mes_kiq = true;
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block);
+		adev->enable_mes = true;
+		adev->enable_mes_kiq = true;
+		if (amdgpu_uni_mes)
+			adev->enable_uni_mes = true;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		aqua_vanjaram_init_soc_config(adev);
+		break;
+	default:
+		break;
+	}
+}
+
+static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+	case IP_VERSION(6, 1, 0):
+	case IP_VERSION(6, 1, 1):
+	case IP_VERSION(6, 1, 3):
+		amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+	case IP_VERSION(4, 0, 5):
+	case IP_VERSION(4, 0, 6):
+		if (amdgpu_umsch_mm & 0x1) {
+			amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
+			adev->enable_umsch_mm = true;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int amdgpu_discovery_set_isp_ip_blocks(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DRM_AMD_ISP)
+	switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+	case IP_VERSION(4, 1, 0):
+		amdgpu_device_ip_block_add(adev, &isp_v4_1_0_ip_block);
+		break;
+	case IP_VERSION(4, 1, 1):
+		amdgpu_device_ip_block_add(adev, &isp_v4_1_1_ip_block);
+		break;
+	default:
+		break;
+	}
+#endif
+
+	return 0;
+}
+
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
+{
+	int r;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		vega10_reg_base_init(adev);
+		adev->sdma.num_instances = 2;
+		adev->sdma.sdma_mask = 3;
+		adev->gmc.num_umc = 4;
+		adev->gfx.xcc_mask = 1;
+		adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+		adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 1, 0);
+		adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 0, 0);
+		adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 0, 1);
+		adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+		adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 0);
+		break;
+	case CHIP_VEGA12:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		vega10_reg_base_init(adev);
+		adev->sdma.num_instances = 2;
+		adev->sdma.sdma_mask = 3;
+		adev->gmc.num_umc = 4;
+		adev->gfx.xcc_mask = 1;
+		adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+		adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
+		adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
+		adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 0, 1);
+		adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 0, 1);
+		adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 0, 1);
+		adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 5, 0);
+		adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(6, 2, 0);
+		adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 0);
+		adev->ip_versions[MP0_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[MP1_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[THM_HWIP][0] = IP_VERSION(9, 0, 0);
+		adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(9, 0, 1);
+		adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 1);
+		adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 0, 0);
+		adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 0, 0);
+		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 0, 1);
+		break;
+	case CHIP_RAVEN:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		vega10_reg_base_init(adev);
+		adev->sdma.num_instances = 1;
+		adev->sdma.sdma_mask = 1;
+		adev->vcn.num_vcn_inst = 1;
+		adev->gmc.num_umc = 2;
+		adev->gfx.xcc_mask = 1;
+		if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
+			adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+			adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
+			adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 1);
+			adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 1);
+			adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 1);
+			adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 1);
+			adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 1);
+			adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 5, 0);
+			adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 1);
+			adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 1);
+			adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 1, 0);
+			adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 1);
+			adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
+			adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
+			adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+			adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
+		} else {
+			adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+			adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
+			adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 1, 0);
+			adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 1, 0);
+			adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 1, 0);
+			adev->ip_versions[DF_HWIP][0] = IP_VERSION(2, 1, 0);
+			adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 0, 0);
+			adev->ip_versions[UMC_HWIP][0] = IP_VERSION(7, 0, 0);
+			adev->ip_versions[MP0_HWIP][0] = IP_VERSION(10, 0, 0);
+			adev->ip_versions[MP1_HWIP][0] = IP_VERSION(10, 0, 0);
+			adev->ip_versions[THM_HWIP][0] = IP_VERSION(10, 0, 0);
+			adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(10, 0, 0);
+			adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
+			adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
+			adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+			adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
+		}
+		break;
+	case CHIP_VEGA20:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		vega20_reg_base_init(adev);
+		adev->sdma.num_instances = 2;
+		adev->sdma.sdma_mask = 3;
+		adev->gmc.num_umc = 8;
+		adev->gfx.xcc_mask = 1;
+		adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+		adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
+		adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
+		adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 0);
+		adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 0);
+		adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 0);
+		adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 0);
+		adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 0);
+		adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 1);
+		adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 2);
+		adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+		adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 2);
+		adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 2);
+		adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 0);
+		adev->ip_versions[UVD_HWIP][0] = IP_VERSION(7, 2, 0);
+		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(7, 2, 0);
+		adev->ip_versions[VCE_HWIP][0] = IP_VERSION(4, 1, 0);
+		adev->ip_versions[DCI_HWIP][0] = IP_VERSION(12, 1, 0);
+		break;
+	case CHIP_ARCTURUS:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		arct_reg_base_init(adev);
+		adev->sdma.num_instances = 8;
+		adev->sdma.sdma_mask = 0xff;
+		adev->vcn.num_vcn_inst = 2;
+		adev->gmc.num_umc = 8;
+		adev->gfx.xcc_mask = 1;
+		adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+		adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
+		adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
+		adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 2, 1);
+		adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][0] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][2] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][3] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][4] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][5] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[SDMA1_HWIP][6] = IP_VERSION(4, 2, 2);
+		adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 1);
+		adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 1);
+		adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 1, 2);
+		adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 4);
+		adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 2);
+		adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 3);
+		adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 3);
+		adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 1);
+		adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 5, 0);
+		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 5, 0);
+		break;
+	case CHIP_ALDEBARAN:
+		/* This is not fatal.  We only need the discovery
+		 * binary for sysfs.  We don't need it for a
+		 * functional system.
+		 */
+		amdgpu_discovery_init(adev);
+		aldebaran_reg_base_init(adev);
+		adev->sdma.num_instances = 5;
+		adev->sdma.sdma_mask = 0x1f;
+		adev->vcn.num_vcn_inst = 2;
+		adev->gmc.num_umc = 4;
+		adev->gfx.xcc_mask = 1;
+		adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+		adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
+		adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[HDP_HWIP][0] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[SDMA0_HWIP][1] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[SDMA0_HWIP][2] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[SDMA0_HWIP][3] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[SDMA0_HWIP][4] = IP_VERSION(4, 4, 0);
+		adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 6, 2);
+		adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(7, 4, 4);
+		adev->ip_versions[UMC_HWIP][0] = IP_VERSION(6, 7, 0);
+		adev->ip_versions[MP0_HWIP][0] = IP_VERSION(13, 0, 2);
+		adev->ip_versions[MP1_HWIP][0] = IP_VERSION(13, 0, 2);
+		adev->ip_versions[THM_HWIP][0] = IP_VERSION(13, 0, 2);
+		adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(13, 0, 2);
+		adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 4, 2);
+		adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 6, 0);
+		adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
+		adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
+		break;
+	case CHIP_CYAN_SKILLFISH:
+		if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+			r = amdgpu_discovery_reg_base_init(adev);
+			if (r)
+				return -EINVAL;
+
+			amdgpu_discovery_harvest_ip(adev);
+			amdgpu_discovery_get_gfx_info(adev);
+			amdgpu_discovery_get_mall_info(adev);
+			amdgpu_discovery_get_vcn_info(adev);
+		} else {
+			cyan_skillfish_reg_base_init(adev);
+			adev->sdma.num_instances = 2;
+			adev->sdma.sdma_mask = 3;
+			adev->gfx.xcc_mask = 1;
+			adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+			adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
+			adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
+			adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1);
+			adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1);
+			adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1);
+			adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0);
+			adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1);
+			adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1);
+			adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8);
+			adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8);
+			adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1);
+			adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8);
+			adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3);
+			adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3);
+		}
+		break;
+	default:
+		r = amdgpu_discovery_reg_base_init(adev);
+		if (r) {
+			drm_err(&adev->ddev, "discovery failed: %d\n", r);
+			return r;
+		}
+
+		amdgpu_discovery_harvest_ip(adev);
+		amdgpu_discovery_get_gfx_info(adev);
+		amdgpu_discovery_get_mall_info(adev);
+		amdgpu_discovery_get_vcn_info(adev);
+		break;
+	}
+
+	amdgpu_discovery_init_soc_config(adev);
+	amdgpu_discovery_sysfs_init(adev);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 0, 1):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		adev->family = AMDGPU_FAMILY_AI;
+		break;
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+		adev->family = AMDGPU_FAMILY_RV;
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		adev->family = AMDGPU_FAMILY_NV;
+		break;
+	case IP_VERSION(10, 3, 1):
+		adev->family = AMDGPU_FAMILY_VGH;
+		adev->apu_flags |= AMD_APU_IS_VANGOGH;
+		break;
+	case IP_VERSION(10, 3, 3):
+		adev->family = AMDGPU_FAMILY_YC;
+		break;
+	case IP_VERSION(10, 3, 6):
+		adev->family = AMDGPU_FAMILY_GC_10_3_6;
+		break;
+	case IP_VERSION(10, 3, 7):
+		adev->family = AMDGPU_FAMILY_GC_10_3_7;
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		adev->family = AMDGPU_FAMILY_GC_11_0_0;
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+		adev->family = AMDGPU_FAMILY_GC_11_0_1;
+		break;
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		adev->family = AMDGPU_FAMILY_GC_11_5_0;
+		break;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		adev->family = AMDGPU_FAMILY_GC_12_0_0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 7):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		adev->flags |= AMD_IS_APU;
+		break;
+	default:
+		break;
+	}
+
+	/* set NBIO version */
+	switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+	case IP_VERSION(6, 1, 0):
+	case IP_VERSION(6, 2, 0):
+		adev->nbio.funcs = &nbio_v6_1_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 0, 0):
+	case IP_VERSION(7, 0, 1):
+	case IP_VERSION(2, 5, 0):
+		adev->nbio.funcs = &nbio_v7_0_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_0_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 4, 0):
+	case IP_VERSION(7, 4, 1):
+	case IP_VERSION(7, 4, 4):
+		adev->nbio.funcs = &nbio_v7_4_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 9, 0):
+	case IP_VERSION(7, 9, 1):
+		adev->nbio.funcs = &nbio_v7_9_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 11, 0):
+	case IP_VERSION(7, 11, 1):
+	case IP_VERSION(7, 11, 2):
+	case IP_VERSION(7, 11, 3):
+		adev->nbio.funcs = &nbio_v7_11_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 2, 0):
+	case IP_VERSION(7, 2, 1):
+	case IP_VERSION(7, 3, 0):
+	case IP_VERSION(7, 5, 0):
+	case IP_VERSION(7, 5, 1):
+		adev->nbio.funcs = &nbio_v7_2_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg;
+		break;
+	case IP_VERSION(2, 1, 1):
+	case IP_VERSION(2, 3, 0):
+	case IP_VERSION(2, 3, 1):
+	case IP_VERSION(2, 3, 2):
+	case IP_VERSION(3, 3, 0):
+	case IP_VERSION(3, 3, 1):
+	case IP_VERSION(3, 3, 2):
+	case IP_VERSION(3, 3, 3):
+		adev->nbio.funcs = &nbio_v2_3_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
+		break;
+	case IP_VERSION(4, 3, 0):
+	case IP_VERSION(4, 3, 1):
+		if (amdgpu_sriov_vf(adev))
+			adev->nbio.funcs = &nbio_v4_3_sriov_funcs;
+		else
+			adev->nbio.funcs = &nbio_v4_3_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
+		break;
+	case IP_VERSION(7, 7, 0):
+	case IP_VERSION(7, 7, 1):
+		adev->nbio.funcs = &nbio_v7_7_funcs;
+		adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
+		break;
+	case IP_VERSION(6, 3, 1):
+		adev->nbio.funcs = &nbif_v6_3_1_funcs;
+		adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg;
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) {
+	case IP_VERSION(4, 0, 0):
+	case IP_VERSION(4, 0, 1):
+	case IP_VERSION(4, 1, 0):
+	case IP_VERSION(4, 1, 1):
+	case IP_VERSION(4, 1, 2):
+	case IP_VERSION(4, 2, 0):
+	case IP_VERSION(4, 2, 1):
+	case IP_VERSION(4, 4, 0):
+	case IP_VERSION(4, 4, 2):
+	case IP_VERSION(4, 4, 5):
+		adev->hdp.funcs = &hdp_v4_0_funcs;
+		break;
+	case IP_VERSION(5, 0, 0):
+	case IP_VERSION(5, 0, 1):
+	case IP_VERSION(5, 0, 2):
+	case IP_VERSION(5, 0, 3):
+	case IP_VERSION(5, 0, 4):
+	case IP_VERSION(5, 2, 0):
+		adev->hdp.funcs = &hdp_v5_0_funcs;
+		break;
+	case IP_VERSION(5, 2, 1):
+		adev->hdp.funcs = &hdp_v5_2_funcs;
+		break;
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 1, 0):
+		adev->hdp.funcs = &hdp_v6_0_funcs;
+		break;
+	case IP_VERSION(7, 0, 0):
+		adev->hdp.funcs = &hdp_v7_0_funcs;
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, DF_HWIP, 0)) {
+	case IP_VERSION(3, 6, 0):
+	case IP_VERSION(3, 6, 1):
+	case IP_VERSION(3, 6, 2):
+		adev->df.funcs = &df_v3_6_funcs;
+		break;
+	case IP_VERSION(2, 1, 0):
+	case IP_VERSION(2, 1, 1):
+	case IP_VERSION(2, 5, 0):
+	case IP_VERSION(3, 5, 1):
+	case IP_VERSION(3, 5, 2):
+		adev->df.funcs = &df_v1_7_funcs;
+		break;
+	case IP_VERSION(4, 3, 0):
+		adev->df.funcs = &df_v4_3_funcs;
+		break;
+	case IP_VERSION(4, 6, 2):
+		adev->df.funcs = &df_v4_6_2_funcs;
+		break;
+	case IP_VERSION(4, 15, 0):
+	case IP_VERSION(4, 15, 1):
+		adev->df.funcs = &df_v4_15_funcs;
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, SMUIO_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+	case IP_VERSION(9, 0, 1):
+	case IP_VERSION(10, 0, 0):
+	case IP_VERSION(10, 0, 1):
+	case IP_VERSION(10, 0, 2):
+		adev->smuio.funcs = &smuio_v9_0_funcs;
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 8):
+		adev->smuio.funcs = &smuio_v11_0_funcs;
+		break;
+	case IP_VERSION(11, 0, 6):
+	case IP_VERSION(11, 0, 10):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(13, 0, 1):
+	case IP_VERSION(13, 0, 9):
+	case IP_VERSION(13, 0, 10):
+		adev->smuio.funcs = &smuio_v11_0_6_funcs;
+		break;
+	case IP_VERSION(13, 0, 2):
+		adev->smuio.funcs = &smuio_v13_0_funcs;
+		break;
+	case IP_VERSION(13, 0, 3):
+	case IP_VERSION(13, 0, 11):
+		adev->smuio.funcs = &smuio_v13_0_3_funcs;
+		if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
+			adev->flags |= AMD_IS_APU;
+		}
+		break;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(14, 0, 0):
+	case IP_VERSION(14, 0, 1):
+		adev->smuio.funcs = &smuio_v13_0_6_funcs;
+		break;
+	case IP_VERSION(14, 0, 2):
+		adev->smuio.funcs = &smuio_v14_0_2_funcs;
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 0, 2):
+	case IP_VERSION(6, 0, 3):
+		adev->lsdma.funcs = &lsdma_v6_0_funcs;
+		break;
+	case IP_VERSION(7, 0, 0):
+	case IP_VERSION(7, 0, 1):
+		adev->lsdma.funcs = &lsdma_v7_0_funcs;
+		break;
+	default:
+		break;
+	}
+
+	r = amdgpu_discovery_set_common_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_gmc_ip_blocks(adev);
+	if (r)
+		return r;
+
+	/* For SR-IOV, PSP needs to be initialized before IH */
+	if (amdgpu_sriov_vf(adev)) {
+		r = amdgpu_discovery_set_psp_ip_blocks(adev);
+		if (r)
+			return r;
+		r = amdgpu_discovery_set_ih_ip_blocks(adev);
+		if (r)
+			return r;
+	} else {
+		r = amdgpu_discovery_set_ih_ip_blocks(adev);
+		if (r)
+			return r;
+
+		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+			r = amdgpu_discovery_set_psp_ip_blocks(adev);
+			if (r)
+				return r;
+		}
+	}
+
+	if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+		r = amdgpu_discovery_set_smu_ip_blocks(adev);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_discovery_set_display_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_gc_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_sdma_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_ras_ip_blocks(adev);
+	if (r)
+		return r;
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+	     !amdgpu_sriov_vf(adev)) ||
+	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+		r = amdgpu_discovery_set_smu_ip_blocks(adev);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_discovery_set_mm_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_mes_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_vpe_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_umsch_mm_ip_blocks(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_discovery_set_isp_ip_blocks(adev);
+	if (r)
+		return r;
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
new file mode 100644
index 000000000000..4ce04486cc31
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DISCOVERY__
+#define __AMDGPU_DISCOVERY__
+
+#include <linux/debugfs.h>
+
+#define DISCOVERY_TMR_SIZE      (10 << 10)
+#define DISCOVERY_TMR_OFFSET    (64 << 10)
+
+struct ip_discovery_top;
+
+struct amdgpu_discovery_info {
+	struct debugfs_blob_wrapper debugfs_blob;
+	struct ip_discovery_top *ip_top;
+	uint32_t size;
+	uint8_t *bin;
+	bool reserve_tmr;
+};
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev);
+int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
+
+int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
+				  uint32_t *nps_type,
+				  struct amdgpu_gmc_memrange **ranges,
+				  int *range_cnt, bool refresh);
+
+#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
new file mode 100644
index 000000000000..b5d34797d606
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -0,0 +1,1928 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
+#include <asm/div64.h>
+
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_vblank.h>
+
+/**
+ * amdgpu_display_hotplug_work_func - work handler for display hotplug event
+ *
+ * @work: work struct pointer
+ *
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt.  It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
+ */
+void amdgpu_display_hotplug_work_func(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  hotplug_work.work);
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+
+	mutex_lock(&mode_config->mutex);
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
+		amdgpu_connector_hotplug(connector);
+	drm_connector_list_iter_end(&iter);
+	mutex_unlock(&mode_config->mutex);
+	/* Just fire off a uevent and let userspace tell us what to do */
+	drm_helper_hpd_irq_event(dev);
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+					   struct amdgpu_framebuffer *rfb,
+					   const struct drm_mode_fb_cmd2 *mode_cmd,
+					   struct drm_gem_object *obj);
+
+static void amdgpu_display_flip_callback(struct dma_fence *f,
+					 struct dma_fence_cb *cb)
+{
+	struct amdgpu_flip_work *work =
+		container_of(cb, struct amdgpu_flip_work, cb);
+
+	dma_fence_put(f);
+	schedule_work(&work->flip_work.work);
+}
+
+static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
+					     struct dma_fence **f)
+{
+	struct dma_fence *fence = *f;
+
+	if (fence == NULL)
+		return false;
+
+	*f = NULL;
+
+	if (!dma_fence_add_callback(fence, &work->cb,
+				    amdgpu_display_flip_callback))
+		return true;
+
+	dma_fence_put(fence);
+	return false;
+}
+
+static void amdgpu_display_flip_work_func(struct work_struct *__work)
+{
+	struct delayed_work *delayed_work =
+		container_of(__work, struct delayed_work, work);
+	struct amdgpu_flip_work *work =
+		container_of(delayed_work, struct amdgpu_flip_work, flip_work);
+	struct amdgpu_device *adev = work->adev;
+	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[work->crtc_id];
+
+	struct drm_crtc *crtc = &amdgpu_crtc->base;
+	unsigned long flags;
+	unsigned int i;
+	int vpos, hpos;
+
+	for (i = 0; i < work->shared_count; ++i)
+		if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
+			return;
+
+	/* Wait until we're out of the vertical blank period before the one
+	 * targeted by the flip
+	 */
+	if (amdgpu_crtc->enabled &&
+	    (amdgpu_display_get_crtc_scanoutpos(adev_to_drm(adev), work->crtc_id, 0,
+						&vpos, &hpos, NULL, NULL,
+						&crtc->hwmode)
+	     & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
+	    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
+	    (int)(work->target_vblank -
+		  amdgpu_get_vblank_counter_kms(crtc)) > 0) {
+		schedule_delayed_work(&work->flip_work, usecs_to_jiffies(1000));
+		return;
+	}
+
+	/* We borrow the event spin lock for protecting flip_status */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+	/* Do the flip (mmio) */
+	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base, work->async);
+
+	/* Set the flip status */
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+
+	drm_dbg_vbl(adev_to_drm(adev),
+		    "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
+		    amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+
+}
+
+/*
+ * Handle unpin events outside the interrupt handler proper.
+ */
+static void amdgpu_display_unpin_work_func(struct work_struct *__work)
+{
+	struct amdgpu_flip_work *work =
+		container_of(__work, struct amdgpu_flip_work, unpin_work);
+	int r;
+
+	/* unpin of the old buffer */
+	r = amdgpu_bo_reserve(work->old_abo, true);
+	if (likely(r == 0)) {
+		amdgpu_bo_unpin(work->old_abo);
+		amdgpu_bo_unreserve(work->old_abo);
+	} else
+		DRM_ERROR("failed to reserve buffer after flip\n");
+
+	amdgpu_bo_unref(&work->old_abo);
+	kfree(work->shared);
+	kfree(work);
+}
+
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_gem_object *obj;
+	struct amdgpu_flip_work *work;
+	struct amdgpu_bo *new_abo;
+	unsigned long flags;
+	u64 tiling_flags;
+	int i, r;
+
+	work = kzalloc(sizeof(*work), GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
+	INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
+
+	work->event = event;
+	work->adev = adev;
+	work->crtc_id = amdgpu_crtc->crtc_id;
+	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
+
+	/* schedule unpin of the old buffer */
+	obj = crtc->primary->fb->obj[0];
+
+	/* take a reference to the old object */
+	work->old_abo = gem_to_amdgpu_bo(obj);
+	amdgpu_bo_ref(work->old_abo);
+
+	obj = fb->obj[0];
+	new_abo = gem_to_amdgpu_bo(obj);
+
+	/* pin the new buffer */
+	r = amdgpu_bo_reserve(new_abo, false);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("failed to reserve new abo buffer before flip\n");
+		goto cleanup;
+	}
+
+	if (!adev->enable_virtual_display) {
+		new_abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		r = amdgpu_bo_pin(new_abo,
+				  amdgpu_display_supported_domains(adev, new_abo->flags));
+		if (unlikely(r != 0)) {
+			DRM_ERROR("failed to pin new abo buffer before flip\n");
+			goto unreserve;
+		}
+	}
+
+	r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", new_abo);
+		goto unpin;
+	}
+
+	r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+				&work->shared_count,
+				&work->shared);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("failed to get fences for buffer\n");
+		goto unpin;
+	}
+
+	amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
+	amdgpu_bo_unreserve(new_abo);
+
+	if (!adev->enable_virtual_display)
+		work->base = amdgpu_bo_gpu_offset(new_abo);
+	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
+		amdgpu_get_vblank_counter_kms(crtc);
+
+	/* we borrow the event spin lock for protecting flip_wrok */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_NONE) {
+		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		r = -EBUSY;
+		goto pflip_cleanup;
+	}
+
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING;
+	amdgpu_crtc->pflip_works = work;
+
+
+	DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n",
+					 amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+	/* update crtc fb */
+	crtc->primary->fb = fb;
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	amdgpu_display_flip_work_func(&work->flip_work.work);
+	return 0;
+
+pflip_cleanup:
+	if (unlikely(amdgpu_bo_reserve(new_abo, false) != 0)) {
+		DRM_ERROR("failed to reserve new abo in error path\n");
+		goto cleanup;
+	}
+unpin:
+	if (!adev->enable_virtual_display)
+		amdgpu_bo_unpin(new_abo);
+
+unreserve:
+	amdgpu_bo_unreserve(new_abo);
+
+cleanup:
+	amdgpu_bo_unref(&work->old_abo);
+	for (i = 0; i < work->shared_count; ++i)
+		dma_fence_put(work->shared[i]);
+	kfree(work->shared);
+	kfree(work);
+
+	return r;
+}
+
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_device *dev;
+	struct amdgpu_device *adev;
+	struct drm_crtc *crtc;
+	bool active = false;
+	int ret;
+
+	if (!set || !set->crtc)
+		return -EINVAL;
+
+	dev = set->crtc->dev;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0)
+		goto out;
+
+	ret = drm_crtc_helper_set_config(set, ctx);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		if (crtc->enabled)
+			active = true;
+
+	adev = drm_to_adev(dev);
+	/* if we have active crtcs and we don't have a power ref,
+	 * take the current one
+	 */
+	if (active && !adev->have_disp_power_ref) {
+		adev->have_disp_power_ref = true;
+		return ret;
+	}
+	/* if we have no active crtcs, then go to
+	 * drop the power ref we got before
+	 */
+	if (!active && adev->have_disp_power_ref)
+		adev->have_disp_power_ref = false;
+out:
+	/* drop the power reference we got coming in here */
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
+
+static const char *encoder_names[41] = {
+	"NONE",
+	"INTERNAL_LVDS",
+	"INTERNAL_TMDS1",
+	"INTERNAL_TMDS2",
+	"INTERNAL_DAC1",
+	"INTERNAL_DAC2",
+	"INTERNAL_SDVOA",
+	"INTERNAL_SDVOB",
+	"SI170B",
+	"CH7303",
+	"CH7301",
+	"INTERNAL_DVO1",
+	"EXTERNAL_SDVOA",
+	"EXTERNAL_SDVOB",
+	"TITFP513",
+	"INTERNAL_LVTM1",
+	"VT1623",
+	"HDMI_SI1930",
+	"HDMI_INTERNAL",
+	"INTERNAL_KLDSCP_TMDS1",
+	"INTERNAL_KLDSCP_DVO1",
+	"INTERNAL_KLDSCP_DAC1",
+	"INTERNAL_KLDSCP_DAC2",
+	"SI178",
+	"MVPU_FPGA",
+	"INTERNAL_DDI",
+	"VT1625",
+	"HDMI_SI1932",
+	"DP_AN9801",
+	"DP_DP501",
+	"INTERNAL_UNIPHY",
+	"INTERNAL_KLDSCP_LVTMA",
+	"INTERNAL_UNIPHY1",
+	"INTERNAL_UNIPHY2",
+	"NUTMEG",
+	"TRAVIS",
+	"INTERNAL_VCE",
+	"INTERNAL_UNIPHY3",
+	"HDMI_ANX9805",
+	"INTERNAL_AMCLK",
+	"VIRTUAL",
+};
+
+static const char *hpd_names[6] = {
+	"HPD1",
+	"HPD2",
+	"HPD3",
+	"HPD4",
+	"HPD5",
+	"HPD6",
+};
+
+void amdgpu_display_print_display_setup(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector;
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+	struct drm_connector_list_iter iter;
+	uint32_t devices;
+	int i = 0;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	DRM_INFO("AMDGPU Display Connectors\n");
+	drm_for_each_connector_iter(connector, &iter) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		DRM_INFO("Connector %d:\n", i);
+		DRM_INFO("  %s\n", connector->name);
+		if (amdgpu_connector->hpd.hpd != AMDGPU_HPD_NONE)
+			DRM_INFO("  %s\n", hpd_names[amdgpu_connector->hpd.hpd]);
+		if (amdgpu_connector->ddc_bus) {
+			DRM_INFO("  DDC: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+				 amdgpu_connector->ddc_bus->rec.mask_clk_reg,
+				 amdgpu_connector->ddc_bus->rec.mask_data_reg,
+				 amdgpu_connector->ddc_bus->rec.a_clk_reg,
+				 amdgpu_connector->ddc_bus->rec.a_data_reg,
+				 amdgpu_connector->ddc_bus->rec.en_clk_reg,
+				 amdgpu_connector->ddc_bus->rec.en_data_reg,
+				 amdgpu_connector->ddc_bus->rec.y_clk_reg,
+				 amdgpu_connector->ddc_bus->rec.y_data_reg);
+			if (amdgpu_connector->router.ddc_valid)
+				DRM_INFO("  DDC Router 0x%x/0x%x\n",
+					 amdgpu_connector->router.ddc_mux_control_pin,
+					 amdgpu_connector->router.ddc_mux_state);
+			if (amdgpu_connector->router.cd_valid)
+				DRM_INFO("  Clock/Data Router 0x%x/0x%x\n",
+					 amdgpu_connector->router.cd_mux_control_pin,
+					 amdgpu_connector->router.cd_mux_state);
+		} else {
+			if (connector->connector_type == DRM_MODE_CONNECTOR_VGA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVII ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_DVIA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+			    connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				DRM_INFO("  DDC: no ddc bus - possible BIOS bug - please report to xorg-driver-ati@lists.x.org\n");
+		}
+		DRM_INFO("  Encoders:\n");
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			amdgpu_encoder = to_amdgpu_encoder(encoder);
+			devices = amdgpu_encoder->devices & amdgpu_connector->devices;
+			if (devices) {
+				if (devices & ATOM_DEVICE_CRT1_SUPPORT)
+					DRM_INFO("    CRT1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_CRT2_SUPPORT)
+					DRM_INFO("    CRT2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_LCD1_SUPPORT)
+					DRM_INFO("    LCD1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP1_SUPPORT)
+					DRM_INFO("    DFP1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP2_SUPPORT)
+					DRM_INFO("    DFP2: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP3_SUPPORT)
+					DRM_INFO("    DFP3: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP4_SUPPORT)
+					DRM_INFO("    DFP4: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP5_SUPPORT)
+					DRM_INFO("    DFP5: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_DFP6_SUPPORT)
+					DRM_INFO("    DFP6: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_TV1_SUPPORT)
+					DRM_INFO("    TV1: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+				if (devices & ATOM_DEVICE_CV_SUPPORT)
+					DRM_INFO("    CV: %s\n", encoder_names[amdgpu_encoder->encoder_id]);
+			}
+		}
+		i++;
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux)
+{
+	u8 out = 0x0;
+	u8 buf[8];
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			.addr = DDC_ADDR,
+			.flags = 0,
+			.len = 1,
+			.buf = &out,
+		},
+		{
+			.addr = DDC_ADDR,
+			.flags = I2C_M_RD,
+			.len = 8,
+			.buf = buf,
+		}
+	};
+
+	/* on hw with routers, select right port */
+	if (amdgpu_connector->router.ddc_valid)
+		amdgpu_i2c_router_select_ddc_port(amdgpu_connector);
+
+	if (use_aux)
+		ret = i2c_transfer(&amdgpu_connector->ddc_bus->aux.ddc, msgs, 2);
+	else
+		ret = i2c_transfer(&amdgpu_connector->ddc_bus->adapter, msgs, 2);
+
+	if (ret != 2)
+		/* Couldn't find an accessible DDC on this connector */
+		return false;
+	/* Probe also for valid EDID header
+	 * EDID header starts with:
+	 * 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00.
+	 * Only the first 6 bytes must be valid as
+	 * drm_edid_block_valid() can fix the last 2 bytes
+	 */
+	if (drm_edid_header_is_valid(buf) < 6) {
+		/* Couldn't find an accessible EDID on this
+		 * connector
+		 */
+		return false;
+	}
+	return true;
+}
+
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+			  unsigned int flags, unsigned int color,
+			  struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+	if (file)
+		return -ENOSYS;
+
+	return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+					 num_clips);
+}
+
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
+};
+
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
+	.dirty = amdgpu_dirtyfb
+};
+
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+					  uint64_t bo_flags)
+{
+	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+#if defined(CONFIG_DRM_AMD_DC)
+	/*
+	 * if amdgpu_bo_support_uswc returns false it means that USWC mappings
+	 * is not supported for this board. But this mapping is required
+	 * to avoid hang caused by placement of scanout BO in GTT on certain
+	 * APUs. So force the BO placement to VRAM in case this architecture
+	 * will not allow USWC mappings.
+	 * Also, don't allow GTT domain if the BO doesn't have USWC flag set.
+	 */
+	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
+	    amdgpu_bo_support_uswc(bo_flags) &&
+	    adev->dc_enabled &&
+	    adev->mode_info.gpu_vm_support)
+		domain |= AMDGPU_GEM_DOMAIN_GTT;
+#endif
+
+	return domain;
+}
+
+static const struct drm_format_info dcc_formats[] = {
+	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	 { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	   .has_alpha = true, },
+	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 2,
+	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 2,
+	  .cpp = { 2, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+};
+
+static const struct drm_format_info dcc_retile_formats[] = {
+	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	 { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	   .has_alpha = true, },
+	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+	{ .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 3,
+	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1,
+	  .has_alpha = true, },
+	{ .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 3,
+	  .cpp = { 2, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, },
+};
+
+static const struct drm_format_info *
+lookup_format_info(const struct drm_format_info formats[],
+		  int num_formats, u32 format)
+{
+	int i;
+
+	for (i = 0; i < num_formats; i++) {
+		if (formats[i].format == format)
+			return &formats[i];
+	}
+
+	return NULL;
+}
+
+const struct drm_format_info *
+amdgpu_lookup_format_info(u32 format, uint64_t modifier)
+{
+	if (!IS_AMD_FMT_MOD(modifier))
+		return NULL;
+
+	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX9 ||
+	    AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12)
+		return NULL;
+
+	if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
+		return lookup_format_info(dcc_retile_formats,
+					  ARRAY_SIZE(dcc_retile_formats),
+					  format);
+
+	if (AMD_FMT_MOD_GET(DCC, modifier))
+		return lookup_format_info(dcc_formats, ARRAY_SIZE(dcc_formats),
+					  format);
+
+	/* returning NULL will cause the default format structs to be used. */
+	return NULL;
+}
+
+
+/*
+ * Tries to extract the renderable DCC offset from the opaque metadata attached
+ * to the buffer.
+ */
+static int
+extract_render_dcc_offset(struct amdgpu_device *adev,
+			  struct drm_gem_object *obj,
+			  uint64_t *offset)
+{
+	struct amdgpu_bo *rbo;
+	int r = 0;
+	uint32_t metadata[10]; /* Something that fits a descriptor + header. */
+	uint32_t size;
+
+	rbo = gem_to_amdgpu_bo(obj);
+	r = amdgpu_bo_reserve(rbo, false);
+
+	if (unlikely(r)) {
+		/* Don't show error message when returning -ERESTARTSYS */
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Unable to reserve buffer: %d\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_get_metadata(rbo, metadata, sizeof(metadata), &size, NULL);
+	amdgpu_bo_unreserve(rbo);
+
+	if (r)
+		return r;
+
+	/*
+	 * The first word is the metadata version, and we need space for at least
+	 * the version + pci vendor+device id + 8 words for a descriptor.
+	 */
+	if (size < 40  || metadata[0] != 1)
+		return -EINVAL;
+
+	if (adev->family >= AMDGPU_FAMILY_NV) {
+		/* resource word 6/7 META_DATA_ADDRESS{_LO} */
+		*offset = ((u64)metadata[9] << 16u) |
+			  ((metadata[8] & 0xFF000000u) >> 16);
+	} else {
+		/* resource word 5/7 META_DATA_ADDRESS */
+		*offset = ((u64)metadata[9] << 8u) |
+			  ((u64)(metadata[7] & 0x1FE0000u) << 23);
+	}
+
+	return 0;
+}
+
+static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
+{
+	u64 modifier = 0;
+	int swizzle_mode = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
+
+	if (!swizzle_mode) {
+		modifier = DRM_FORMAT_MOD_LINEAR;
+	} else {
+		int max_comp_block =
+			AMDGPU_TILING_GET(afb->tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+
+		modifier =
+			AMD_FMT_MOD |
+			AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX12) |
+			AMD_FMT_MOD_SET(TILE, swizzle_mode) |
+			AMD_FMT_MOD_SET(DCC, afb->gfx12_dcc) |
+			AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_comp_block);
+	}
+
+	afb->base.modifier = modifier;
+	afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+	return 0;
+}
+
+static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
+{
+	struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
+	uint64_t modifier = 0;
+	int num_pipes = 0;
+	int num_pkrs = 0;
+
+	num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+	num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+	if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
+		modifier = DRM_FORMAT_MOD_LINEAR;
+	} else {
+		int swizzle = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE);
+		bool has_xor = swizzle >= 16;
+		int block_size_bits;
+		int version;
+		int pipe_xor_bits = 0;
+		int bank_xor_bits = 0;
+		int packers = 0;
+		int rb = 0;
+		int pipes = ilog2(num_pipes);
+		uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
+
+		switch (swizzle >> 2) {
+		case 0: /* 256B */
+			block_size_bits = 8;
+			break;
+		case 1: /* 4KiB */
+		case 5: /* 4KiB _X */
+			block_size_bits = 12;
+			break;
+		case 2: /* 64KiB */
+		case 4: /* 64 KiB _T */
+		case 6: /* 64 KiB _X */
+			block_size_bits = 16;
+			break;
+		case 7: /* 256 KiB */
+			block_size_bits = 18;
+			break;
+		default:
+			/* RESERVED or VAR */
+			return -EINVAL;
+		}
+
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0))
+			version = AMD_FMT_MOD_TILE_VER_GFX11;
+		else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+			 IP_VERSION(10, 3, 0))
+			version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
+		else if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+			 IP_VERSION(10, 0, 0))
+			version = AMD_FMT_MOD_TILE_VER_GFX10;
+		else
+			version = AMD_FMT_MOD_TILE_VER_GFX9;
+
+		switch (swizzle & 3) {
+		case 0: /* Z microtiling */
+			return -EINVAL;
+		case 1: /* S microtiling */
+			if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+			    IP_VERSION(11, 0, 0)) {
+				if (!has_xor)
+					version = AMD_FMT_MOD_TILE_VER_GFX9;
+			}
+			break;
+		case 2:
+			if (amdgpu_ip_version(adev, GC_HWIP, 0) <
+			    IP_VERSION(11, 0, 0)) {
+				if (!has_xor && afb->base.format->cpp[0] != 4)
+					version = AMD_FMT_MOD_TILE_VER_GFX9;
+			}
+			break;
+		case 3:
+			break;
+		}
+
+		if (has_xor) {
+			if (num_pipes == num_pkrs && num_pkrs == 0) {
+				DRM_ERROR("invalid number of pipes and packers\n");
+				return -EINVAL;
+			}
+
+			switch (version) {
+			case AMD_FMT_MOD_TILE_VER_GFX11:
+				pipe_xor_bits = min(block_size_bits - 8, pipes);
+				packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+				break;
+			case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+				pipe_xor_bits = min(block_size_bits - 8, pipes);
+				packers = min(block_size_bits - 8 - pipe_xor_bits,
+					      ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs));
+				break;
+			case AMD_FMT_MOD_TILE_VER_GFX10:
+				pipe_xor_bits = min(block_size_bits - 8, pipes);
+				break;
+			case AMD_FMT_MOD_TILE_VER_GFX9:
+				rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) +
+				     ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se);
+				pipe_xor_bits = min(block_size_bits - 8, pipes +
+						    ilog2(adev->gfx.config.gb_addr_config_fields.num_se));
+				bank_xor_bits = min(block_size_bits - 8 - pipe_xor_bits,
+						    ilog2(adev->gfx.config.gb_addr_config_fields.num_banks));
+				break;
+			}
+		}
+
+		modifier = AMD_FMT_MOD |
+			   AMD_FMT_MOD_SET(TILE, AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) |
+			   AMD_FMT_MOD_SET(TILE_VERSION, version) |
+			   AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
+			   AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) |
+			   AMD_FMT_MOD_SET(PACKERS, packers);
+
+		if (dcc_offset != 0) {
+			bool dcc_i64b = AMDGPU_TILING_GET(afb->tiling_flags, DCC_INDEPENDENT_64B) != 0;
+			bool dcc_i128b = version >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
+			const struct drm_format_info *format_info;
+			u64 render_dcc_offset;
+
+			/* Enable constant encode on RAVEN2 and later. */
+			bool dcc_constant_encode =
+				(adev->asic_type > CHIP_RAVEN ||
+				 (adev->asic_type == CHIP_RAVEN &&
+				  adev->external_rev_id >= 0x81)) &&
+				amdgpu_ip_version(adev, GC_HWIP, 0) <
+					IP_VERSION(11, 0, 0);
+
+			int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
+					      dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
+					      AMD_FMT_MOD_DCC_BLOCK_256B;
+
+			modifier |= AMD_FMT_MOD_SET(DCC, 1) |
+				    AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, dcc_constant_encode) |
+				    AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, dcc_i64b) |
+				    AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, dcc_i128b) |
+				    AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_cblock_size);
+
+			afb->base.offsets[1] = dcc_offset * 256 + afb->base.offsets[0];
+			afb->base.pitches[1] =
+				AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1;
+
+			/*
+			 * If the userspace driver uses retiling the tiling flags do not contain
+			 * info on the renderable DCC buffer. Luckily the opaque metadata contains
+			 * the info so we can try to extract it. The kernel does not use this info
+			 * but we should convert it to a modifier plane for getfb2, so the
+			 * userspace driver that gets it doesn't have to juggle around another DCC
+			 * plane internally.
+			 */
+			if (extract_render_dcc_offset(adev, afb->base.obj[0],
+						      &render_dcc_offset) == 0 &&
+			    render_dcc_offset != 0 &&
+			    render_dcc_offset != afb->base.offsets[1] &&
+			    render_dcc_offset < UINT_MAX) {
+				uint32_t dcc_block_bits;  /* of base surface data */
+
+				modifier |= AMD_FMT_MOD_SET(DCC_RETILE, 1);
+				afb->base.offsets[2] = render_dcc_offset;
+
+				if (adev->family >= AMDGPU_FAMILY_NV) {
+					int extra_pipe = 0;
+
+					if ((amdgpu_ip_version(adev, GC_HWIP,
+							       0) >=
+					     IP_VERSION(10, 3, 0)) &&
+					    pipes == packers && pipes > 1)
+						extra_pipe = 1;
+
+					dcc_block_bits = max(20, 16 + pipes + extra_pipe);
+				} else {
+					modifier |= AMD_FMT_MOD_SET(RB, rb) |
+						    AMD_FMT_MOD_SET(PIPE, pipes);
+					dcc_block_bits = max(20, 18 + rb);
+				}
+
+				dcc_block_bits -= ilog2(afb->base.format->cpp[0]);
+				afb->base.pitches[2] = ALIGN(afb->base.width,
+							     1u << ((dcc_block_bits + 1) / 2));
+			}
+			format_info = amdgpu_lookup_format_info(afb->base.format->format,
+								modifier);
+			if (!format_info)
+				return -EINVAL;
+
+			afb->base.format = format_info;
+		}
+	}
+
+	afb->base.modifier = modifier;
+	afb->base.flags |= DRM_MODE_FB_MODIFIERS;
+	return 0;
+}
+
+/* Mirrors the is_displayable check in radeonsi's gfx6_compute_surface */
+static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb)
+{
+	u64 micro_tile_mode;
+
+	if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */
+		return 0;
+
+	micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE);
+	switch (micro_tile_mode) {
+	case 0: /* DISPLAY */
+	case 3: /* RENDER */
+		return 0;
+	default:
+		drm_dbg_kms(afb->base.dev,
+			    "Micro tile mode %llu not supported for scanout\n",
+			    micro_tile_mode);
+		return -EINVAL;
+	}
+}
+
+static void get_block_dimensions(unsigned int block_log2, unsigned int cpp,
+				 unsigned int *width, unsigned int *height)
+{
+	unsigned int cpp_log2 = ilog2(cpp);
+	unsigned int pixel_log2 = block_log2 - cpp_log2;
+	unsigned int width_log2 = (pixel_log2 + 1) / 2;
+	unsigned int height_log2 = pixel_log2 - width_log2;
+
+	*width = 1 << width_log2;
+	*height = 1 << height_log2;
+}
+
+static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
+				       bool pipe_aligned)
+{
+	unsigned int ver = AMD_FMT_MOD_GET(TILE_VERSION, modifier);
+
+	switch (ver) {
+	case AMD_FMT_MOD_TILE_VER_GFX9: {
+		/*
+		 * TODO: for pipe aligned we may need to check the alignment of the
+		 * total size of the surface, which may need to be bigger than the
+		 * natural alignment due to some HW workarounds
+		 */
+		return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
+	}
+	case AMD_FMT_MOD_TILE_VER_GFX10:
+	case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+	case AMD_FMT_MOD_TILE_VER_GFX11: {
+		int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
+
+		if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+		    AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
+			++pipes_log2;
+
+		return max(8 + (pipe_aligned ? pipes_log2 : 0), 12);
+	}
+	default:
+		return 0;
+	}
+}
+
+static int amdgpu_display_verify_plane(struct amdgpu_framebuffer *rfb, int plane,
+				       const struct drm_format_info *format,
+				       unsigned int block_width, unsigned int block_height,
+				       unsigned int block_size_log2)
+{
+	unsigned int width = rfb->base.width /
+		((plane && plane < format->num_planes) ? format->hsub : 1);
+	unsigned int height = rfb->base.height /
+		((plane && plane < format->num_planes) ? format->vsub : 1);
+	unsigned int cpp = plane < format->num_planes ? format->cpp[plane] : 1;
+	unsigned int block_pitch = block_width * cpp;
+	unsigned int min_pitch = ALIGN(width * cpp, block_pitch);
+	unsigned int block_size = 1 << block_size_log2;
+	uint64_t size;
+
+	if (rfb->base.pitches[plane] % block_pitch) {
+		drm_dbg_kms(rfb->base.dev,
+			    "pitch %d for plane %d is not a multiple of block pitch %d\n",
+			    rfb->base.pitches[plane], plane, block_pitch);
+		return -EINVAL;
+	}
+	if (rfb->base.pitches[plane] < min_pitch) {
+		drm_dbg_kms(rfb->base.dev,
+			    "pitch %d for plane %d is less than minimum pitch %d\n",
+			    rfb->base.pitches[plane], plane, min_pitch);
+		return -EINVAL;
+	}
+
+	/* Force at least natural alignment. */
+	if (rfb->base.offsets[plane] % block_size) {
+		drm_dbg_kms(rfb->base.dev,
+			    "offset 0x%x for plane %d is not a multiple of block pitch 0x%x\n",
+			    rfb->base.offsets[plane], plane, block_size);
+		return -EINVAL;
+	}
+
+	size = rfb->base.offsets[plane] +
+		(uint64_t)rfb->base.pitches[plane] / block_pitch *
+		block_size * DIV_ROUND_UP(height, block_height);
+
+	if (rfb->base.obj[0]->size < size) {
+		drm_dbg_kms(rfb->base.dev,
+			    "BO size 0x%zx is less than 0x%llx required for plane %d\n",
+			    rfb->base.obj[0]->size, size, plane);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
+{
+	const struct drm_format_info *format_info = drm_format_info(rfb->base.format->format);
+	uint64_t modifier = rfb->base.modifier;
+	int ret;
+	unsigned int i, block_width, block_height, block_size_log2;
+
+	if (rfb->base.dev->mode_config.fb_modifiers_not_supported)
+		return 0;
+
+	for (i = 0; i < format_info->num_planes; ++i) {
+		if (modifier == DRM_FORMAT_MOD_LINEAR) {
+			block_width = 256 / format_info->cpp[i];
+			block_height = 1;
+			block_size_log2 = 8;
+		} else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) {
+			int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+			switch (swizzle) {
+			case AMD_FMT_MOD_TILE_GFX12_256B_2D:
+				block_size_log2 = 8;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_4K_2D:
+				block_size_log2 = 12;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_64K_2D:
+				block_size_log2 = 16;
+				break;
+			case AMD_FMT_MOD_TILE_GFX12_256K_2D:
+				block_size_log2 = 18;
+				break;
+			default:
+				drm_dbg_kms(rfb->base.dev,
+					    "Gfx12 swizzle mode with unknown block size: %d\n", swizzle);
+				return -EINVAL;
+			}
+
+			get_block_dimensions(block_size_log2, format_info->cpp[i],
+					     &block_width, &block_height);
+		} else {
+			int swizzle = AMD_FMT_MOD_GET(TILE, modifier);
+
+			switch ((swizzle & ~3) + 1) {
+			case DC_SW_256B_S:
+				block_size_log2 = 8;
+				break;
+			case DC_SW_4KB_S:
+			case DC_SW_4KB_S_X:
+				block_size_log2 = 12;
+				break;
+			case DC_SW_64KB_S:
+			case DC_SW_64KB_S_T:
+			case DC_SW_64KB_S_X:
+				block_size_log2 = 16;
+				break;
+			case DC_SW_VAR_S_X:
+				block_size_log2 = 18;
+				break;
+			default:
+				drm_dbg_kms(rfb->base.dev,
+					    "Swizzle mode with unknown block size: %d\n", swizzle);
+				return -EINVAL;
+			}
+
+			get_block_dimensions(block_size_log2, format_info->cpp[i],
+					     &block_width, &block_height);
+		}
+
+		ret = amdgpu_display_verify_plane(rfb, i, format_info,
+						  block_width, block_height, block_size_log2);
+		if (ret)
+			return ret;
+	}
+
+	if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 &&
+	    AMD_FMT_MOD_GET(DCC, modifier)) {
+		if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
+			block_size_log2 = get_dcc_block_size(modifier, false, false);
+			get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+					     &block_width, &block_height);
+			ret = amdgpu_display_verify_plane(rfb, i, format_info,
+							  block_width, block_height,
+							  block_size_log2);
+			if (ret)
+				return ret;
+
+			++i;
+			block_size_log2 = get_dcc_block_size(modifier, true, true);
+		} else {
+			bool pipe_aligned = AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
+
+			block_size_log2 = get_dcc_block_size(modifier, true, pipe_aligned);
+		}
+		get_block_dimensions(block_size_log2 + 8, format_info->cpp[0],
+				     &block_width, &block_height);
+		ret = amdgpu_display_verify_plane(rfb, i, format_info,
+						  block_width, block_height, block_size_log2);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
+				      uint64_t *tiling_flags, bool *tmz_surface,
+				      bool *gfx12_dcc)
+{
+	struct amdgpu_bo *rbo;
+	int r;
+
+	if (!amdgpu_fb) {
+		*tiling_flags = 0;
+		*tmz_surface = false;
+		*gfx12_dcc = false;
+		return 0;
+	}
+
+	rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]);
+	r = amdgpu_bo_reserve(rbo, false);
+
+	if (unlikely(r)) {
+		/* Don't show error message when returning -ERESTARTSYS */
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Unable to reserve buffer: %d\n", r);
+		return r;
+	}
+
+	amdgpu_bo_get_tiling_flags(rbo, tiling_flags);
+	*tmz_surface = amdgpu_bo_encrypted(rbo);
+	*gfx12_dcc = rbo->flags & AMDGPU_GEM_CREATE_GFX12_DCC;
+
+	amdgpu_bo_unreserve(rbo);
+
+	return r;
+}
+
+static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+						 struct amdgpu_framebuffer *rfb,
+						 struct drm_file *file_priv,
+						 const struct drm_format_info *info,
+						 const struct drm_mode_fb_cmd2 *mode_cmd,
+						 struct drm_gem_object *obj)
+{
+	int ret;
+
+	rfb->base.obj[0] = obj;
+	drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd);
+	/* Verify that the modifier is supported. */
+	if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
+				      mode_cmd->modifier[0])) {
+		drm_dbg_kms(dev,
+			    "unsupported pixel format %p4cc / modifier 0x%llx\n",
+			    &mode_cmd->pixel_format, mode_cmd->modifier[0]);
+
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+	if (ret)
+		goto err;
+
+	if (drm_drv_uses_atomic_modeset(dev))
+		ret = drm_framebuffer_init(dev, &rfb->base,
+					   &amdgpu_fb_funcs_atomic);
+	else
+		ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
+	rfb->base.obj[0] = NULL;
+	return ret;
+}
+
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+					   struct amdgpu_framebuffer *rfb,
+					   const struct drm_mode_fb_cmd2 *mode_cmd,
+					   struct drm_gem_object *obj)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int ret, i;
+
+	/*
+	 * This needs to happen before modifier conversion as that might change
+	 * the number of planes.
+	 */
+	for (i = 1; i < rfb->base.format->num_planes; ++i) {
+		if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
+			drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n",
+				    i, mode_cmd->handles[0], mode_cmd->handles[i]);
+			ret = -EINVAL;
+			return ret;
+		}
+	}
+
+	ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface,
+					 &rfb->gfx12_dcc);
+	if (ret)
+		return ret;
+
+	if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) {
+		drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
+			      "GFX9+ requires FB check based on format modifier\n");
+		ret = check_tiling_flags_gfx6(rfb);
+		if (ret)
+			return ret;
+	}
+
+	if (!dev->mode_config.fb_modifiers_not_supported &&
+	    !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0))
+			ret = convert_tiling_flags_to_modifier_gfx12(rfb);
+		else
+			ret = convert_tiling_flags_to_modifier(rfb);
+
+		if (ret) {
+			drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier",
+				    rfb->tiling_flags);
+			return ret;
+		}
+	}
+
+	ret = amdgpu_display_verify_sizes(rfb);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < rfb->base.format->num_planes; ++i) {
+		drm_gem_object_get(rfb->base.obj[0]);
+		rfb->base.obj[i] = rfb->base.obj[0];
+	}
+
+	return 0;
+}
+
+struct drm_framebuffer *
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_format_info *info,
+				       const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct amdgpu_framebuffer *amdgpu_fb;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *bo;
+	uint32_t domains;
+	int ret;
+
+	obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
+	if (obj ==  NULL) {
+		drm_dbg_kms(dev,
+			    "No GEM object associated to handle 0x%08X, can't create framebuffer\n",
+			    mode_cmd->handles[0]);
+
+		return ERR_PTR(-ENOENT);
+	}
+
+	/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
+	bo = gem_to_amdgpu_bo(obj);
+	domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags);
+	if (drm_gem_is_imported(obj) && !(domains & AMDGPU_GEM_DOMAIN_GTT)) {
+		drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");
+		drm_gem_object_put(obj);
+		return ERR_PTR(-EINVAL);
+	}
+
+	amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL);
+	if (amdgpu_fb == NULL) {
+		drm_gem_object_put(obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv,
+						    info, mode_cmd, obj);
+	if (ret) {
+		kfree(amdgpu_fb);
+		drm_gem_object_put(obj);
+		return ERR_PTR(ret);
+	}
+
+	drm_gem_object_put(obj);
+	return &amdgpu_fb->base;
+}
+
+const struct drm_mode_config_funcs amdgpu_mode_funcs = {
+	.fb_create = amdgpu_display_user_framebuffer_create,
+};
+
+static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = {
+	{ UNDERSCAN_OFF, "off" },
+	{ UNDERSCAN_ON, "on" },
+	{ UNDERSCAN_AUTO, "auto" },
+};
+
+static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = {
+	{ AMDGPU_AUDIO_DISABLE, "off" },
+	{ AMDGPU_AUDIO_ENABLE, "on" },
+	{ AMDGPU_AUDIO_AUTO, "auto" },
+};
+
+/* XXX support different dither options? spatial, temporal, both, etc. */
+static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = {
+	{ AMDGPU_FMT_DITHER_DISABLE, "off" },
+	{ AMDGPU_FMT_DITHER_ENABLE, "on" },
+};
+
+/**
+ * DOC: property for adaptive backlight modulation
+ *
+ * The 'adaptive backlight modulation' property is used for the compositor to
+ * directly control the adaptive backlight modulation power savings feature
+ * that is part of DCN hardware.
+ *
+ * The property will be attached specifically to eDP panels that support it.
+ *
+ * The property is by default set to 'sysfs' to allow the sysfs file 'panel_power_savings'
+ * to be able to control it.
+ * If set to 'off' the compositor will ensure it stays off.
+ * The other values 'min', 'bias min', 'bias max', and 'max' will control the
+ * intensity of the power savings.
+ *
+ * Modifying this value can have implications on color accuracy, so tread
+ * carefully.
+ */
+static int amdgpu_display_setup_abm_prop(struct amdgpu_device *adev)
+{
+	const struct drm_prop_enum_list props[] = {
+		{ ABM_SYSFS_CONTROL, "sysfs" },
+		{ ABM_LEVEL_OFF, "off" },
+		{ ABM_LEVEL_MIN, "min" },
+		{ ABM_LEVEL_BIAS_MIN, "bias min" },
+		{ ABM_LEVEL_BIAS_MAX, "bias max" },
+		{ ABM_LEVEL_MAX, "max" },
+	};
+	struct drm_property *prop;
+	int i;
+
+	if (!adev->dc_enabled)
+		return 0;
+
+	prop = drm_property_create(adev_to_drm(adev), DRM_MODE_PROP_ENUM,
+				"adaptive backlight modulation",
+				6);
+	if (!prop)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(props); i++) {
+		int ret;
+
+		ret = drm_property_add_enum(prop, props[i].type,
+						props[i].name);
+
+		if (ret) {
+			drm_property_destroy(adev_to_drm(adev), prop);
+
+			return ret;
+		}
+	}
+
+	adev->mode_info.abm_level_property = prop;
+
+	return 0;
+}
+
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
+{
+	int sz;
+
+	adev->mode_info.coherent_mode_property =
+		drm_property_create_range(adev_to_drm(adev), 0, "coherent", 0, 1);
+	if (!adev->mode_info.coherent_mode_property)
+		return -ENOMEM;
+
+	adev->mode_info.load_detect_property =
+		drm_property_create_range(adev_to_drm(adev), 0, "load detection", 0, 1);
+	if (!adev->mode_info.load_detect_property)
+		return -ENOMEM;
+
+	drm_mode_create_scaling_mode_property(adev_to_drm(adev));
+
+	sz = ARRAY_SIZE(amdgpu_underscan_enum_list);
+	adev->mode_info.underscan_property =
+		drm_property_create_enum(adev_to_drm(adev), 0,
+					 "underscan",
+					 amdgpu_underscan_enum_list, sz);
+
+	adev->mode_info.underscan_hborder_property =
+		drm_property_create_range(adev_to_drm(adev), 0,
+					  "underscan hborder", 0, 128);
+	if (!adev->mode_info.underscan_hborder_property)
+		return -ENOMEM;
+
+	adev->mode_info.underscan_vborder_property =
+		drm_property_create_range(adev_to_drm(adev), 0,
+					  "underscan vborder", 0, 128);
+	if (!adev->mode_info.underscan_vborder_property)
+		return -ENOMEM;
+
+	sz = ARRAY_SIZE(amdgpu_audio_enum_list);
+	adev->mode_info.audio_property =
+		drm_property_create_enum(adev_to_drm(adev), 0,
+					 "audio",
+					 amdgpu_audio_enum_list, sz);
+
+	sz = ARRAY_SIZE(amdgpu_dither_enum_list);
+	adev->mode_info.dither_property =
+		drm_property_create_enum(adev_to_drm(adev), 0,
+					 "dither",
+					 amdgpu_dither_enum_list, sz);
+
+	return amdgpu_display_setup_abm_prop(adev);
+}
+
+void amdgpu_display_update_priority(struct amdgpu_device *adev)
+{
+	/* adjustment options for the display watermarks */
+	if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
+		adev->mode_info.disp_priority = 0;
+	else
+		adev->mode_info.disp_priority = amdgpu_disp_priority;
+
+}
+
+static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
+{
+	/* try and guess if this is a tv or a monitor */
+	if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
+	    (mode->vdisplay == 576) || /* 576p */
+	    (mode->vdisplay == 720) || /* 720p */
+	    (mode->vdisplay == 1080)) /* 1080p */
+		return true;
+	else
+		return false;
+}
+
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_encoder *amdgpu_encoder;
+	struct drm_connector *connector;
+	u32 src_v = 1, dst_v = 1;
+	u32 src_h = 1, dst_h = 1;
+
+	amdgpu_crtc->h_border = 0;
+	amdgpu_crtc->v_border = 0;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc != crtc)
+			continue;
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		connector = amdgpu_get_connector_for_encoder(encoder);
+
+		/* set scaling */
+		if (amdgpu_encoder->rmx_type == RMX_OFF)
+			amdgpu_crtc->rmx_type = RMX_OFF;
+		else if (mode->hdisplay < amdgpu_encoder->native_mode.hdisplay ||
+			 mode->vdisplay < amdgpu_encoder->native_mode.vdisplay)
+			amdgpu_crtc->rmx_type = amdgpu_encoder->rmx_type;
+		else
+			amdgpu_crtc->rmx_type = RMX_OFF;
+		/* copy native mode */
+		memcpy(&amdgpu_crtc->native_mode,
+		       &amdgpu_encoder->native_mode,
+		       sizeof(struct drm_display_mode));
+		src_v = crtc->mode.vdisplay;
+		dst_v = amdgpu_crtc->native_mode.vdisplay;
+		src_h = crtc->mode.hdisplay;
+		dst_h = amdgpu_crtc->native_mode.hdisplay;
+
+		/* fix up for overscan on hdmi */
+		if ((!(mode->flags & DRM_MODE_FLAG_INTERLACE)) &&
+		    ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
+		     ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
+		      connector && connector->display_info.is_hdmi &&
+		      amdgpu_display_is_hdtv_mode(mode)))) {
+			if (amdgpu_encoder->underscan_hborder != 0)
+				amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
+			else
+				amdgpu_crtc->h_border = (mode->hdisplay >> 5) + 16;
+			if (amdgpu_encoder->underscan_vborder != 0)
+				amdgpu_crtc->v_border = amdgpu_encoder->underscan_vborder;
+			else
+				amdgpu_crtc->v_border = (mode->vdisplay >> 5) + 16;
+			amdgpu_crtc->rmx_type = RMX_FULL;
+			src_v = crtc->mode.vdisplay;
+			dst_v = crtc->mode.vdisplay - (amdgpu_crtc->v_border * 2);
+			src_h = crtc->mode.hdisplay;
+			dst_h = crtc->mode.hdisplay - (amdgpu_crtc->h_border * 2);
+		}
+	}
+	if (amdgpu_crtc->rmx_type != RMX_OFF) {
+		fixed20_12 a, b;
+
+		a.full = dfixed_const(src_v);
+		b.full = dfixed_const(dst_v);
+		amdgpu_crtc->vsc.full = dfixed_div(a, b);
+		a.full = dfixed_const(src_h);
+		b.full = dfixed_const(dst_h);
+		amdgpu_crtc->hsc.full = dfixed_div(a, b);
+	} else {
+		amdgpu_crtc->vsc.full = dfixed_const(1);
+		amdgpu_crtc->hsc.full = dfixed_const(1);
+	}
+	return true;
+}
+
+/*
+ * Retrieve current video scanout position of crtc on a given gpu, and
+ * an optional accurate timestamp of when query happened.
+ *
+ * \param dev Device to query.
+ * \param pipe Crtc to query.
+ * \param flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ *              For driver internal use only also supports these flags:
+ *
+ *              USE_REAL_VBLANKSTART to use the real start of vblank instead
+ *              of a fudged earlier start of vblank.
+ *
+ *              GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ *              fudged earlier start of vblank in *vpos and the distance
+ *              to true start of vblank in *hpos.
+ *
+ * \param *vpos Location where vertical scanout position should be stored.
+ * \param *hpos Location where horizontal scanout position should go.
+ * \param *stime Target location for timestamp taken immediately before
+ *               scanout position query. Can be NULL to skip timestamp.
+ * \param *etime Target location for timestamp taken immediately after
+ *               scanout position query. Can be NULL to skip timestamp.
+ *
+ * Returns vpos as a positive number while in active scanout area.
+ * Returns vpos as a negative number inside vblank, counting the number
+ * of scanlines to go until end of vblank, e.g., -1 means "one scanline
+ * until start of active scanout / end of vblank."
+ *
+ * \return Flags, or'ed together as follows:
+ *
+ * DRM_SCANOUTPOS_VALID = Query successful.
+ * DRM_SCANOUTPOS_INVBL = Inside vblank.
+ * DRM_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of
+ * this flag means that returned position may be offset by a constant but
+ * unknown small number of scanlines wrt. real scanout position.
+ *
+ */
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode)
+{
+	u32 vbl = 0, position = 0;
+	int vbl_start, vbl_end, vtotal, ret = 0;
+	bool in_vbl = true;
+
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Get optional system timestamp before query. */
+	if (stime)
+		*stime = ktime_get();
+
+	if (amdgpu_display_page_flip_get_scanoutpos(adev, pipe, &vbl, &position) == 0)
+		ret |= DRM_SCANOUTPOS_VALID;
+
+	/* Get optional system timestamp after query. */
+	if (etime)
+		*etime = ktime_get();
+
+	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Decode into vertical and horizontal scanout position. */
+	*vpos = position & 0x1fff;
+	*hpos = (position >> 16) & 0x1fff;
+
+	/* Valid vblank area boundaries from gpu retrieved? */
+	if (vbl > 0) {
+		/* Yes: Decode. */
+		ret |= DRM_SCANOUTPOS_ACCURATE;
+		vbl_start = vbl & 0x1fff;
+		vbl_end = (vbl >> 16) & 0x1fff;
+	} else {
+		/* No: Fake something reasonable which gives at least ok results. */
+		vbl_start = mode->crtc_vdisplay;
+		vbl_end = 0;
+	}
+
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+		/* Caller wants distance from real vbl_start in *hpos */
+		*hpos = *vpos - vbl_start;
+	}
+
+	/* Fudge vblank to start a few scanlines earlier to handle the
+	 * problem that vblank irqs fire a few scanlines before start
+	 * of vblank. Some driver internal callers need the true vblank
+	 * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+	 *
+	 * The cause of the "early" vblank irq is that the irq is triggered
+	 * by the line buffer logic when the line buffer read position enters
+	 * the vblank, whereas our crtc scanout position naturally lags the
+	 * line buffer read position.
+	 */
+	if (!(flags & USE_REAL_VBLANKSTART))
+		vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
+	/* Test scanout position against vblank region. */
+	if ((*vpos < vbl_start) && (*vpos >= vbl_end))
+		in_vbl = false;
+
+	/* In vblank? */
+	if (in_vbl)
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+		/* Caller wants distance from fudged earlier vbl_start */
+		*vpos -= vbl_start;
+		return ret;
+	}
+
+	/* Check if inside vblank area and apply corrective offsets:
+	 * vpos will then be >=0 in video scanout area, but negative
+	 * within vblank area, counting down the number of lines until
+	 * start of scanout.
+	 */
+
+	/* Inside "upper part" of vblank area? Apply corrective offset if so: */
+	if (in_vbl && (*vpos >= vbl_start)) {
+		vtotal = mode->crtc_vtotal;
+
+		/* With variable refresh rate displays the vpos can exceed
+		 * the vtotal value. Clamp to 0 to return -vbl_end instead
+		 * of guessing the remaining number of lines until scanout.
+		 */
+		*vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0;
+	}
+
+	/* Correct for shifted end of vbl at vbl_end. */
+	*vpos = *vpos - vbl_end;
+
+	return ret;
+}
+
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
+{
+	if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
+		return AMDGPU_CRTC_IRQ_NONE;
+
+	switch (crtc) {
+	case 0:
+		return AMDGPU_CRTC_IRQ_VBLANK1;
+	case 1:
+		return AMDGPU_CRTC_IRQ_VBLANK2;
+	case 2:
+		return AMDGPU_CRTC_IRQ_VBLANK3;
+	case 3:
+		return AMDGPU_CRTC_IRQ_VBLANK4;
+	case 4:
+		return AMDGPU_CRTC_IRQ_VBLANK5;
+	case 5:
+		return AMDGPU_CRTC_IRQ_VBLANK6;
+	default:
+		return AMDGPU_CRTC_IRQ_NONE;
+	}
+}
+
+bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
+			bool in_vblank_irq, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	unsigned int pipe = crtc->index;
+
+	return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+						  stime, etime, mode);
+}
+
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+	if (!fb_helper || !fb_helper->buffer)
+		return false;
+
+	if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+		return false;
+
+	return true;
+}
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_crtc *crtc;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	int r;
+
+	drm_kms_helper_poll_disable(dev);
+
+	/* turn off display hw */
+	drm_modeset_lock_all(dev);
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
+		drm_helper_connector_dpms(connector,
+					  DRM_MODE_DPMS_OFF);
+	drm_connector_list_iter_end(&iter);
+	drm_modeset_unlock_all(dev);
+	/* unpin the front buffers and cursors */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+		struct drm_framebuffer *fb = crtc->primary->fb;
+		struct amdgpu_bo *robj;
+
+		if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+			r = amdgpu_bo_reserve(aobj, true);
+			if (r == 0) {
+				amdgpu_bo_unpin(aobj);
+				amdgpu_bo_unreserve(aobj);
+			}
+		}
+
+		if (!fb || !fb->obj[0])
+			continue;
+
+		robj = gem_to_amdgpu_bo(fb->obj[0]);
+		if (!amdgpu_display_robj_is_fb(adev, robj)) {
+			r = amdgpu_bo_reserve(robj, true);
+			if (r == 0) {
+				amdgpu_bo_unpin(robj);
+				amdgpu_bo_unreserve(robj);
+			}
+		}
+	}
+	return 0;
+}
+
+int amdgpu_display_resume_helper(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct drm_crtc *crtc;
+	int r;
+
+	/* pin cursors */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+		if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+
+			r = amdgpu_bo_reserve(aobj, true);
+			if (r == 0) {
+				aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+				r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+				if (r != 0)
+					dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
+				amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+				amdgpu_bo_unreserve(aobj);
+			}
+		}
+	}
+
+	drm_helper_resume_force_mode(dev);
+
+	/* turn on display hw */
+	drm_modeset_lock_all(dev);
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
+		drm_helper_connector_dpms(connector,
+					  DRM_MODE_DPMS_ON);
+	drm_connector_list_iter_end(&iter);
+
+	drm_modeset_unlock_all(dev);
+
+	drm_kms_helper_poll_enable(dev);
+
+	return 0;
+}
+
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+				     unsigned int x,
+				     unsigned int y,
+				     u32 color)
+{
+	struct amdgpu_res_cursor cursor;
+	unsigned long offset;
+	struct amdgpu_bo *abo = panic_abo;
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	uint32_t tmp;
+
+	offset = x * 4 + y * sb->pitch[0];
+	amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+	tmp = cursor.start >> 31;
+	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+	if (tmp != 0xffffffff)
+		WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+	WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+				      struct drm_scanout_buffer *sb)
+{
+	struct amdgpu_bo *abo;
+	struct drm_framebuffer *fb = plane->state->fb;
+
+	if (!fb)
+		return -EINVAL;
+
+	DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+	abo = gem_to_amdgpu_bo(fb->obj[0]);
+	if (!abo)
+		return -EINVAL;
+
+	sb->width = fb->width;
+	sb->height = fb->height;
+	/* Use the generic linear format, because tiling will be disabled in panic_flush() */
+	sb->format = drm_format_info(fb->format->format);
+	if (!sb->format)
+		return -EINVAL;
+
+	sb->pitch[0] = fb->pitches[0];
+
+	if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+		if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+			drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+			return -EINVAL;
+		}
+		/* Only handle 32bits format, to simplify mmio access */
+		if (fb->format->cpp[0] != 4) {
+			drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+			return -EINVAL;
+		}
+		sb->set_pixel = amdgpu_display_set_pixel;
+		panic_abo = abo;
+		return 0;
+	}
+	if (!abo->kmap.virtual &&
+	    ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+		drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+		return -ENOMEM;
+	}
+	if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+		iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+	else
+		iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
new file mode 100644
index 000000000000..49a29bf47a37
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DISPLAY_H__
+#define __AMDGPU_DISPLAY_H__
+
+#include <drm/drm_panic.h>
+
+#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
+#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
+#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
+#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
+#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
+#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
+#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
+#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
+#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
+#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
+#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
+
+void amdgpu_display_hotplug_work_func(struct work_struct *work);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+					  uint64_t bo_flags);
+struct drm_framebuffer *
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+				       struct drm_file *file_priv,
+				       const struct drm_format_info *info,
+				       const struct drm_mode_fb_cmd2 *mode_cmd);
+const struct drm_format_info *
+amdgpu_lookup_format_info(u32 format, uint64_t modifier);
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
+int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+				      struct drm_scanout_buffer *sb);
+
+#define ABM_SYSFS_CONTROL	-1
+#define ABM_LEVEL_OFF		0
+#define ABM_LEVEL_MIN		1
+#define ABM_LEVEL_BIAS_MIN	2
+#define ABM_LEVEL_BIAS_MAX	3
+#define ABM_LEVEL_MAX		4
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
new file mode 100644
index 000000000000..e22cfa7c6d32
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * based on nouveau_prime.c
+ *
+ * Authors: Alex Deucher
+ */
+
+/**
+ * DOC: PRIME Buffer Sharing
+ *
+ * The following callback implementations are used for :ref:`sharing GEM buffer
+ * objects between different devices via PRIME <prime_buffer_sharing>`.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_display.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
+#include <drm/amdgpu_drm.h>
+#include <drm/ttm/ttm_tt.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-fence-array.h>
+#include <linux/pci-p2pdma.h>
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
+
+/**
+ * dma_buf_attach_adev - Helper to get adev of an attachment
+ *
+ * @attach: attachment
+ *
+ * Returns:
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
+ */
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
+{
+	if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+		struct drm_gem_object *obj = attach->importer_priv;
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+		return amdgpu_ttm_adev(bo->tbo.bdev);
+	}
+
+	return NULL;
+}
+
+/**
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+				 struct dma_buf_attachment *attach)
+{
+	struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int r;
+
+	/*
+	 * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+	 * Such buffers cannot be safely accessed over P2P due to device-local
+	 * compression metadata. Fallback to system-memory path instead.
+	 * Device supports GFX12 (GC 12.x or newer)
+	 * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+	 *
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+	    bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+		attach->peer2peer = false;
+
+	/*
+	 * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
+	 * Such buffers cannot be safely accessed over P2P due to device-local
+	 * compression metadata. Fallback to system-memory path instead.
+	 * Device supports GFX12 (GC 12.x or newer)
+	 * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
+	 *
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) &&
+	    bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
+		attach->peer2peer = false;
+
+	if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+	    pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+		attach->peer2peer = false;
+
+	r = dma_resv_lock(bo->tbo.base.resv, NULL);
+	if (r)
+		return r;
+
+	amdgpu_vm_bo_update_shared(bo);
+
+	dma_resv_unlock(bo->tbo.base.resv);
+
+	return 0;
+}
+
+/**
+ * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
+ *
+ * @attach: attachment to pin down
+ *
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
+ */
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+	u32 domains = bo->allowed_domains;
+
+	dma_resv_assert_held(dmabuf->resv);
+
+	/* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+	 * support if all attachments can do P2P. If any attachment can't do
+	 * P2P just pin into GTT instead.
+	 *
+	 * To avoid with conflicting pinnings between GPUs and RDMA when move
+	 * notifiers are disabled, only allow pinning in VRAM when move
+	 * notiers are enabled.
+	 */
+	if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+		domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+	} else {
+		list_for_each_entry(attach, &dmabuf->attachments, node)
+			if (!attach->peer2peer)
+				domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+	}
+
+	if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	if (WARN_ON(!domains))
+		return -EINVAL;
+
+	return amdgpu_bo_pin(bo, domains);
+}
+
+/**
+ * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
+ *
+ * @attach: attachment to unpin
+ *
+ * Unpin a previously pinned BO to make it movable again.
+ */
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	amdgpu_bo_unpin(bo);
+}
+
+/**
+ * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
+ * @attach: DMA-buf attachment
+ * @dir: DMA direction
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
+ */
+static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
+					   enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attach->dmabuf;
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct sg_table *sgt;
+	long r;
+
+	if (!bo->tbo.pin_count) {
+		/* move buffer into GTT or VRAM */
+		struct ttm_operation_ctx ctx = { false, false };
+		unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
+
+		if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
+		    attach->peer2peer) {
+			bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+			domains |= AMDGPU_GEM_DOMAIN_VRAM;
+		}
+		amdgpu_bo_placement_from_domain(bo, domains);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	switch (bo->tbo.resource->mem_type) {
+	case TTM_PL_TT:
+		sgt = drm_prime_pages_to_sg(obj->dev,
+					    bo->tbo.ttm->pages,
+					    bo->tbo.ttm->num_pages);
+		if (IS_ERR(sgt))
+			return sgt;
+
+		if (dma_map_sgtable(attach->dev, sgt, dir,
+				    DMA_ATTR_SKIP_CPU_SYNC))
+			goto error_free;
+		break;
+
+	case TTM_PL_VRAM:
+		/* XGMI-accessible memory should never be DMA-mapped */
+		if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+				dma_buf_attach_adev(attach), bo)))
+			return ERR_PTR(-EINVAL);
+
+		r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
+					      bo->tbo.base.size, attach->dev,
+					      dir, &sgt);
+		if (r)
+			return ERR_PTR(r);
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return sgt;
+
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-EBUSY);
+}
+
+/**
+ * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
+ * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * another device. For now, simply unpins the buffer from GTT.
+ */
+static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
+				 struct sg_table *sgt,
+				 enum dma_data_direction dir)
+{
+	if (sg_page(sgt->sgl)) {
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
+		sg_free_table(sgt);
+		kfree(sgt);
+	} else {
+		amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt);
+	}
+}
+
+/**
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: Shared DMA buffer
+ * @direction: Direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+					   enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { true, false };
+	u32 domain = amdgpu_display_supported_domains(adev, bo->flags);
+	int ret;
+	bool reads = (direction == DMA_BIDIRECTIONAL ||
+		      direction == DMA_FROM_DEVICE);
+
+	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+		return 0;
+
+	/* move to gtt */
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (!bo->tbo.pin_count &&
+	    (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	}
+
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	int ret;
+
+	/*
+	 * Pin to keep buffer in place while it's vmap'ed. The actual
+	 * domain is not that important as long as it's mapable. Using
+	 * GTT and VRAM should be compatible with most use cases.
+	 */
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
+	if (ret)
+		return ret;
+	ret = drm_gem_dmabuf_vmap(dma_buf, map);
+	if (ret)
+		amdgpu_bo_unpin(bo);
+
+	return ret;
+}
+
+static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	drm_gem_dmabuf_vunmap(dma_buf, map);
+	amdgpu_bo_unpin(bo);
+}
+
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
+	.attach = amdgpu_dma_buf_attach,
+	.pin = amdgpu_dma_buf_pin,
+	.unpin = amdgpu_dma_buf_unpin,
+	.map_dma_buf = amdgpu_dma_buf_map,
+	.unmap_dma_buf = amdgpu_dma_buf_unmap,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = amdgpu_dma_buf_vmap,
+	.vunmap = amdgpu_dma_buf_vunmap,
+};
+
+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @gobj: GEM BO
+ * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
+ *
+ * The main work is done by the &drm_gem_prime_export helper.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM BO from the given device.
+ */
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
+					int flags)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct dma_buf *buf;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = true,
+		/* We opt to avoid OOM on system pages allocations */
+		.gfp_retry_mayfail = true,
+		.allow_res_evict = false,
+	};
+	int ret;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+		return ERR_PTR(-EPERM);
+
+	ret = ttm_bo_setup_export(&bo->tbo, &ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
+	buf = drm_gem_prime_export(gobj, flags);
+	if (!IS_ERR(buf))
+		buf->ops = &amdgpu_dmabuf_ops;
+
+	return buf;
+}
+
+/**
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
+ * @dev: DRM device
+ * @dma_buf: DMA-buf
+ *
+ * Creates an empty SG BO for DMA-buf import.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct dma_resv *resv = dma_buf->resv;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *bo;
+	uint64_t flags = 0;
+	int ret;
+
+	dma_resv_lock(resv, NULL);
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
+
+		flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+					 AMDGPU_GEM_CREATE_COHERENT |
+					 AMDGPU_GEM_CREATE_EXT_COHERENT |
+					 AMDGPU_GEM_CREATE_UNCACHED);
+	}
+
+	ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_CPU, flags,
+				       ttm_bo_type_sg, resv, &gobj, 0);
+	if (ret)
+		goto error;
+
+	bo = gem_to_amdgpu_bo(gobj);
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+	dma_resv_unlock(resv);
+	return gobj;
+
+error:
+	dma_resv_unlock(resv);
+	return ERR_PTR(ret);
+}
+
+/**
+ * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { false, false };
+	struct ttm_placement placement = {};
+	struct amdgpu_vm_bo_base *bo_base;
+	int r;
+
+	/* FIXME: This should be after the "if", but needs a fix to make sure
+	 * DMABuf imports are initialized in the right VM list.
+	 */
+	amdgpu_vm_bo_invalidate(bo, false);
+	if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
+		return;
+
+	r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+	if (r) {
+		DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+		return;
+	}
+
+	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+		struct amdgpu_vm *vm = bo_base->vm;
+		struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+
+		if (ticket) {
+			/* When we get an error here it means that somebody
+			 * else is holding the VM lock and updating page tables
+			 * So we can just continue here.
+			 */
+			r = dma_resv_lock(resv, ticket);
+			if (r)
+				continue;
+
+		} else {
+			/* TODO: This is more problematic and we actually need
+			 * to allow page tables updates without holding the
+			 * lock.
+			 */
+			if (!dma_resv_trylock(resv))
+				continue;
+		}
+
+		/* Reserve fences for two SDMA page table updates */
+		r = dma_resv_reserve_fences(resv, 2);
+		if (!r)
+			r = amdgpu_vm_clear_freed(adev, vm, NULL);
+		if (!r)
+			r = amdgpu_vm_handle_moved(adev, vm, ticket);
+
+		if (r && r != -EBUSY)
+			DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
+				  r);
+
+		dma_resv_unlock(resv);
+	}
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+	.allow_peer2peer = true,
+	.move_notify = amdgpu_dma_buf_move_notify
+};
+
+/**
+ * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
+ * @dev: DRM device
+ * @dma_buf: Shared DMA buffer
+ *
+ * Import a dma_buf into a the driver and potentially create a new GEM object.
+ *
+ * Returns:
+ * GEM BO representing the shared DMA buffer for the given device.
+ */
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					       struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attach;
+	struct drm_gem_object *obj;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+					&amdgpu_dma_buf_attach_ops, obj);
+	if (IS_ERR(attach)) {
+		drm_gem_object_put(obj);
+		return ERR_CAST(attach);
+	}
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
+}
+
+/**
+ * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer
+ *
+ * @adev: amdgpu_device pointer of the importer
+ * @bo: amdgpu buffer object
+ *
+ * Returns:
+ * True if dmabuf accessible over xgmi, false otherwise.
+ */
+bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->tbo.base;
+	struct drm_gem_object *gobj;
+
+	if (!adev)
+		return false;
+
+	if (drm_gem_is_imported(obj)) {
+		struct dma_buf *dma_buf = obj->import_attach->dmabuf;
+
+		if (dma_buf->ops != &amdgpu_dmabuf_ops)
+			/* No XGMI with non AMD GPUs */
+			return false;
+
+		gobj = dma_buf->priv;
+		bo = gem_to_amdgpu_bo(gobj);
+	}
+
+	if (amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
+			(bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM))
+		return true;
+
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000000..3e93b9b407a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
+					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
+bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_bo *bo);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
new file mode 100644
index 000000000000..2675689ef70f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -0,0 +1,372 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_DOORBELL_H
+#define AMDGPU_DOORBELL_H
+
+/*
+ * GPU doorbell structures, functions & helpers
+ */
+struct amdgpu_doorbell {
+	/* doorbell mmio */
+	resource_size_t		base;
+	resource_size_t		size;
+
+	/* Number of doorbells reserved for amdgpu kernel driver */
+	u32 num_kernel_doorbells;
+
+	/* Kernel doorbells */
+	struct amdgpu_bo *kernel_doorbells;
+
+	/* For CPU access of doorbells */
+	uint32_t *cpu_addr;
+};
+
+/* Reserved doorbells for amdgpu (including multimedia).
+ * KFD can use all the rest in the 2M doorbell bar.
+ * For asic before vega10, doorbell is 32-bit, so the
+ * index/offset is in dword. For vega10 and after, doorbell
+ * can be 64-bit, so the index defined is in qword.
+ */
+struct amdgpu_doorbell_index {
+	uint32_t kiq;
+	uint32_t mec_ring0;
+	uint32_t mec_ring1;
+	uint32_t mec_ring2;
+	uint32_t mec_ring3;
+	uint32_t mec_ring4;
+	uint32_t mec_ring5;
+	uint32_t mec_ring6;
+	uint32_t mec_ring7;
+	uint32_t userqueue_start;
+	uint32_t userqueue_end;
+	uint32_t gfx_ring0;
+	uint32_t gfx_ring1;
+	uint32_t gfx_userqueue_start;
+	uint32_t gfx_userqueue_end;
+	uint32_t sdma_engine[16];
+	uint32_t mes_ring0;
+	uint32_t mes_ring1;
+	uint32_t ih;
+	union {
+		struct {
+			uint32_t vcn_ring0_1;
+			uint32_t vcn_ring2_3;
+			uint32_t vcn_ring4_5;
+			uint32_t vcn_ring6_7;
+		} vcn;
+		struct {
+			uint32_t uvd_ring0_1;
+			uint32_t uvd_ring2_3;
+			uint32_t uvd_ring4_5;
+			uint32_t uvd_ring6_7;
+			uint32_t vce_ring0_1;
+			uint32_t vce_ring2_3;
+			uint32_t vce_ring4_5;
+			uint32_t vce_ring6_7;
+		} uvd_vce;
+	};
+	uint32_t vpe_ring;
+	uint32_t first_non_cp;
+	uint32_t last_non_cp;
+	uint32_t max_assignment;
+	/* Per engine SDMA doorbell size in dword */
+	uint32_t sdma_doorbell_range;
+	/* Per xcc doorbell size for KIQ/KCQ */
+	uint32_t xcc_doorbell_range;
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT {
+	AMDGPU_DOORBELL_KIQ                     = 0x000,
+	AMDGPU_DOORBELL_HIQ                     = 0x001,
+	AMDGPU_DOORBELL_DIQ                     = 0x002,
+	AMDGPU_DOORBELL_MEC_RING0               = 0x010,
+	AMDGPU_DOORBELL_MEC_RING1               = 0x011,
+	AMDGPU_DOORBELL_MEC_RING2               = 0x012,
+	AMDGPU_DOORBELL_MEC_RING3               = 0x013,
+	AMDGPU_DOORBELL_MEC_RING4               = 0x014,
+	AMDGPU_DOORBELL_MEC_RING5               = 0x015,
+	AMDGPU_DOORBELL_MEC_RING6               = 0x016,
+	AMDGPU_DOORBELL_MEC_RING7               = 0x017,
+	AMDGPU_DOORBELL_GFX_RING0               = 0x020,
+	AMDGPU_DOORBELL_sDMA_ENGINE0            = 0x1E0,
+	AMDGPU_DOORBELL_sDMA_ENGINE1            = 0x1E1,
+	AMDGPU_DOORBELL_IH                      = 0x1E8,
+	AMDGPU_DOORBELL_MAX_ASSIGNMENT          = 0x3FF,
+	AMDGPU_DOORBELL_INVALID                 = 0xFFFF
+};
+
+enum AMDGPU_VEGA20_DOORBELL_ASSIGNMENT {
+
+	/* Compute + GFX: 0~255 */
+	AMDGPU_VEGA20_DOORBELL_KIQ                     = 0x000,
+	AMDGPU_VEGA20_DOORBELL_HIQ                     = 0x001,
+	AMDGPU_VEGA20_DOORBELL_DIQ                     = 0x002,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING0               = 0x003,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING1               = 0x004,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING2               = 0x005,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING3               = 0x006,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING4               = 0x007,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING5               = 0x008,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING6               = 0x009,
+	AMDGPU_VEGA20_DOORBELL_MEC_RING7               = 0x00A,
+	AMDGPU_VEGA20_DOORBELL_USERQUEUE_START	       = 0x00B,
+	AMDGPU_VEGA20_DOORBELL_USERQUEUE_END	       = 0x08A,
+	AMDGPU_VEGA20_DOORBELL_GFX_RING0               = 0x08B,
+	/* SDMA:256~335*/
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0            = 0x100,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1            = 0x10A,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2            = 0x114,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3            = 0x11E,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4            = 0x128,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5            = 0x132,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6            = 0x13C,
+	AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7            = 0x146,
+	/* IH: 376~391 */
+	AMDGPU_VEGA20_DOORBELL_IH                      = 0x178,
+	/* MMSCH: 392~407
+	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
+	 * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD
+	 */
+	AMDGPU_VEGA20_DOORBELL64_VCN0_1                  = 0x188, /* VNC0 */
+	AMDGPU_VEGA20_DOORBELL64_VCN2_3                  = 0x189,
+	AMDGPU_VEGA20_DOORBELL64_VCN4_5                  = 0x18A,
+	AMDGPU_VEGA20_DOORBELL64_VCN6_7                  = 0x18B,
+
+	AMDGPU_VEGA20_DOORBELL64_VCN8_9                  = 0x18C, /* VNC1 */
+	AMDGPU_VEGA20_DOORBELL64_VCNa_b                  = 0x18D,
+	AMDGPU_VEGA20_DOORBELL64_VCNc_d                  = 0x18E,
+	AMDGPU_VEGA20_DOORBELL64_VCNe_f                  = 0x18F,
+
+	AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1             = 0x188,
+	AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3             = 0x189,
+	AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5             = 0x18A,
+	AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7             = 0x18B,
+
+	AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1             = 0x18C,
+	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
+	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
+	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
+
+	AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP            = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
+	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
+
+	/* kiq/kcq from second XCD. Max 8 XCDs */
+	AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START             = 0x190,
+	/* 8 compute rings per GC. Max to 0x1CE */
+	AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START       = 0x197,
+
+	/* AID1 SDMA: 0x1D0 ~ 0x1F7 */
+	AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START           = 0x1D0,
+
+	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x1F7,
+	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
+};
+
+enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT {
+
+	/* Compute + GFX: 0~255 */
+	AMDGPU_NAVI10_DOORBELL_KIQ			= 0x000,
+	AMDGPU_NAVI10_DOORBELL_HIQ			= 0x001,
+	AMDGPU_NAVI10_DOORBELL_DIQ			= 0x002,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING0		= 0x003,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING1		= 0x004,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING2		= 0x005,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING3		= 0x006,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING4		= 0x007,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING5		= 0x008,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING6		= 0x009,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING7		= 0x00A,
+	AMDGPU_NAVI10_DOORBELL_MES_RING0	        = 0x00B,
+	AMDGPU_NAVI10_DOORBELL_MES_RING1		= 0x00C,
+	AMDGPU_NAVI10_DOORBELL_USERQUEUE_START		= 0x00D,
+	AMDGPU_NAVI10_DOORBELL_USERQUEUE_END		= 0x08A,
+	AMDGPU_NAVI10_DOORBELL_GFX_RING0		= 0x08B,
+	AMDGPU_NAVI10_DOORBELL_GFX_RING1		= 0x08C,
+	AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START	= 0x08D,
+	AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_END	= 0x0FF,
+
+	/* SDMA:256~335*/
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0		= 0x100,
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1		= 0x10A,
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE2		= 0x114,
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE3		= 0x11E,
+	/* IH: 376~391 */
+	AMDGPU_NAVI10_DOORBELL_IH			= 0x178,
+	/* MMSCH: 392~407
+	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
+	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+	 */
+	AMDGPU_NAVI10_DOORBELL64_VCN0_1			= 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+	AMDGPU_NAVI10_DOORBELL64_VCN2_3			= 0x189,
+	AMDGPU_NAVI10_DOORBELL64_VCN4_5			= 0x18A,
+	AMDGPU_NAVI10_DOORBELL64_VCN6_7			= 0x18B,
+
+	AMDGPU_NAVI10_DOORBELL64_VCN8_9			= 0x18C,
+	AMDGPU_NAVI10_DOORBELL64_VCNa_b			= 0x18D,
+	AMDGPU_NAVI10_DOORBELL64_VCNc_d			= 0x18E,
+	AMDGPU_NAVI10_DOORBELL64_VCNe_f			= 0x18F,
+
+	AMDGPU_NAVI10_DOORBELL64_VPE			= 0x190,
+
+	AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP		= AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
+	AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP		= AMDGPU_NAVI10_DOORBELL64_VPE,
+
+	AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT		= AMDGPU_NAVI10_DOORBELL64_VPE,
+	AMDGPU_NAVI10_DOORBELL_INVALID			= 0xFFFF
+};
+
+/*
+ * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
+ */
+enum AMDGPU_DOORBELL64_ASSIGNMENT {
+	/*
+	 * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
+	 * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
+	 *  Compute related doorbells are allocated from 0x00 to 0x8a
+	 */
+
+
+	/* kernel scheduling */
+	AMDGPU_DOORBELL64_KIQ                     = 0x00,
+
+	/* HSA interface queue and debug queue */
+	AMDGPU_DOORBELL64_HIQ                     = 0x01,
+	AMDGPU_DOORBELL64_DIQ                     = 0x02,
+
+	/* Compute engines */
+	AMDGPU_DOORBELL64_MEC_RING0               = 0x03,
+	AMDGPU_DOORBELL64_MEC_RING1               = 0x04,
+	AMDGPU_DOORBELL64_MEC_RING2               = 0x05,
+	AMDGPU_DOORBELL64_MEC_RING3               = 0x06,
+	AMDGPU_DOORBELL64_MEC_RING4               = 0x07,
+	AMDGPU_DOORBELL64_MEC_RING5               = 0x08,
+	AMDGPU_DOORBELL64_MEC_RING6               = 0x09,
+	AMDGPU_DOORBELL64_MEC_RING7               = 0x0a,
+
+	/* User queue doorbell range (128 doorbells) */
+	AMDGPU_DOORBELL64_USERQUEUE_START         = 0x0b,
+	AMDGPU_DOORBELL64_USERQUEUE_END           = 0x8a,
+
+	/* Graphics engine */
+	AMDGPU_DOORBELL64_GFX_RING0               = 0x8b,
+
+	/*
+	 * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
+	 * Graphics voltage island aperture 1
+	 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
+	 */
+
+	/* For vega10 sriov, the sdma doorbell must be fixed as follow
+	 * to keep the same setting with host driver, or it will
+	 * happen conflicts
+	 */
+	AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xF0,
+	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
+	AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xF2,
+	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,
+
+	/* Interrupt handler */
+	AMDGPU_DOORBELL64_IH                      = 0xF4,  /* For legacy interrupt ring buffer */
+	AMDGPU_DOORBELL64_IH_RING1                = 0xF5,  /* For page migration request log */
+	AMDGPU_DOORBELL64_IH_RING2                = 0xF6,  /* For page migration translation/invalidation log */
+
+	/* VCN engine use 32 bits doorbell  */
+	AMDGPU_DOORBELL64_VCN0_1                  = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+	AMDGPU_DOORBELL64_VCN2_3                  = 0xF9,
+	AMDGPU_DOORBELL64_VCN4_5                  = 0xFA,
+	AMDGPU_DOORBELL64_VCN6_7                  = 0xFB,
+
+	/* overlap the doorbell assignment with VCN as they are  mutually exclusive
+	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+	 */
+	AMDGPU_DOORBELL64_UVD_RING0_1             = 0xF8,
+	AMDGPU_DOORBELL64_UVD_RING2_3             = 0xF9,
+	AMDGPU_DOORBELL64_UVD_RING4_5             = 0xFA,
+	AMDGPU_DOORBELL64_UVD_RING6_7             = 0xFB,
+
+	AMDGPU_DOORBELL64_VCE_RING0_1             = 0xFC,
+	AMDGPU_DOORBELL64_VCE_RING2_3             = 0xFD,
+	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
+	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,
+
+	AMDGPU_DOORBELL64_FIRST_NON_CP            = AMDGPU_DOORBELL64_sDMA_ENGINE0,
+	AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,
+
+	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
+	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
+};
+
+enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
+
+	/* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */
+
+	/* KIQ/HIQ/DIQ */
+	AMDGPU_DOORBELL_LAYOUT1_KIQ_START		= 0x000,
+	AMDGPU_DOORBELL_LAYOUT1_HIQ			= 0x001,
+	AMDGPU_DOORBELL_LAYOUT1_DIQ			= 0x002,
+	/* Compute: 0x08 ~ 0x20  */
+	AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START		= 0x008,
+	AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END		= 0x00F,
+	AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START		= 0x010,
+	AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END		= 0x01F,
+	AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE		= 0x020,
+
+	/* SDMA: 0x100 ~ 0x19F */
+	AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START	= 0x100,
+	AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END		= 0x19F,
+	/* IH: 0x1A0 ~ 0x1AF */
+	AMDGPU_DOORBELL_LAYOUT1_IH                      = 0x1A0,
+	/* VCN: 0x1B0 ~ 0x1E8 */
+	AMDGPU_DOORBELL_LAYOUT1_VCN_START               = 0x1B0,
+	AMDGPU_DOORBELL_LAYOUT1_VCN_END                 = 0x1E8,
+
+	AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP		= AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
+	AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP		= AMDGPU_DOORBELL_LAYOUT1_VCN_END,
+
+	AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT          = 0x1E8,
+	AMDGPU_DOORBELL_LAYOUT1_INVALID                 = 0xFFFF
+};
+
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev);
+void amdgpu_doorbell_fini(struct amdgpu_device *adev);
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev);
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+				      struct amdgpu_bo *db_bo,
+				      uint32_t doorbell_index,
+				      uint32_t db_size);
+
+#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
+#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
+#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
+#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
new file mode 100644
index 000000000000..3040437d99c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_mm_rdoorbell - read a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (index < adev->doorbell.num_kernel_doorbells)
+		return readl(adev->doorbell.cpu_addr + index);
+
+	dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+		index);
+	return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell - write a doorbell dword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (CIK).
+ */
+void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (index < adev->doorbell.num_kernel_doorbells)
+		writel(v, adev->doorbell.cpu_addr + index);
+	else
+		dev_err(adev->dev,
+			"writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_mm_rdoorbell64 - read a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ *
+ * Returns the value in the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (index < adev->doorbell.num_kernel_doorbells)
+		return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index));
+
+	dev_err(adev->dev, "reading beyond doorbell aperture: 0x%08x!\n",
+		index);
+	return 0;
+}
+
+/**
+ * amdgpu_mm_wdoorbell64 - write a doorbell Qword
+ *
+ * @adev: amdgpu_device pointer
+ * @index: doorbell index
+ * @v: value to write
+ *
+ * Writes @v to the doorbell aperture at the
+ * requested doorbell index (VEGA10+).
+ */
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
+{
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (index < adev->doorbell.num_kernel_doorbells)
+		atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v);
+	else
+		dev_err(adev->dev,
+			"writing beyond doorbell aperture: 0x%08x!\n", index);
+}
+
+/**
+ * amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @db_bo: doorbell object's bo
+ * @doorbell_index: doorbell relative index in this doorbell object
+ * @db_size: doorbell size is in byte
+ *
+ * returns doorbell's absolute index in BAR
+ */
+uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev,
+				      struct amdgpu_bo *db_bo,
+				      uint32_t doorbell_index,
+				      uint32_t db_size)
+{
+	int db_bo_offset;
+
+	db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo);
+
+	/* doorbell index is 32 bit but doorbell's size can be 32 bit
+	 * or 64 bit, so *db_size(in byte)/4 for alignment.
+	 */
+	return db_bo_offset / sizeof(u32) + doorbell_index *
+	       DIV_ROUND_UP(db_size, 4);
+}
+
+/**
+ * amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Creates doorbells for graphics driver usages.
+ * returns 0 on success, error otherwise.
+ */
+int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev)
+{
+	int r;
+	int size;
+
+	/* SI HW does not have doorbells, skip allocation */
+	if (adev->doorbell.num_kernel_doorbells == 0)
+		return 0;
+
+	/* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */
+	size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE);
+
+	/* Allocate an extra page for MES kernel usages (ring test) */
+	adev->mes.db_start_dw_offset = size / sizeof(u32);
+	size += PAGE_SIZE;
+
+	r = amdgpu_bo_create_kernel(adev,
+				    size,
+				    PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_DOORBELL,
+				    &adev->doorbell.kernel_doorbells,
+				    NULL,
+				    (void **)&adev->doorbell.cpu_addr);
+	if (r) {
+		dev_err(adev->dev,
+			"Failed to allocate kernel doorbells, err=%d\n", r);
+		return r;
+	}
+
+	adev->doorbell.num_kernel_doorbells = size / sizeof(u32);
+	return 0;
+}
+
+/*
+ * GPU doorbell aperture helpers function.
+ */
+/**
+ * amdgpu_doorbell_init - Init doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Init doorbell driver information (CIK)
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_doorbell_init(struct amdgpu_device *adev)
+{
+
+	/* No doorbell on SI hardware generation */
+	if (adev->asic_type < CHIP_BONAIRE) {
+		adev->doorbell.base = 0;
+		adev->doorbell.size = 0;
+		adev->doorbell.num_kernel_doorbells = 0;
+		return 0;
+	}
+
+	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
+		return -EINVAL;
+
+	amdgpu_asic_init_doorbell_index(adev);
+
+	/* doorbell bar mapping */
+	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
+	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
+
+	adev->doorbell.num_kernel_doorbells =
+		min_t(u32, adev->doorbell.size / sizeof(u32),
+		      adev->doorbell_index.max_assignment + 1);
+	if (adev->doorbell.num_kernel_doorbells == 0)
+		return -EINVAL;
+
+	/*
+	 * For Vega, reserve and map two pages on doorbell BAR since SDMA
+	 * paging queue doorbell use the second page. The
+	 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
+	 * doorbells are in the first page. So with paging queue enabled,
+	 * the max num_kernel_doorbells should + 1 page (0x400 in dword)
+	 */
+	if (adev->asic_type >= CHIP_VEGA10)
+		adev->doorbell.num_kernel_doorbells += 0x400;
+
+	return 0;
+}
+
+/**
+ * amdgpu_doorbell_fini - Tear down doorbell driver information.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down doorbell driver information (CIK)
+ */
+void amdgpu_doorbell_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells,
+			      NULL,
+			      (void **)&adev->doorbell.cpu_addr);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
new file mode 100644
index 000000000000..2dfbddcef9ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -0,0 +1,3204 @@
+/*
+ * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/clients/drm_client_setup.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fbdev_ttm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_managed.h>
+#include <drm/drm_pciids.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#include <linux/cc_platform.h>
+#include <linux/dynamic_debug.h>
+#include <linux/module.h>
+#include <linux/mmu_notifier.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+#include <linux/vga_switcheroo.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_drv.h"
+#include "amdgpu_fdinfo.h"
+#include "amdgpu_irq.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+/*
+ * KMS wrapper.
+ * - 3.0.0 - initial driver
+ * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
+ * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
+ *           at the end of IBs.
+ * - 3.3.0 - Add VM support for UVD on supported hardware.
+ * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
+ * - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
+ * - 3.7.0 - Add support for VCE clock list packet
+ * - 3.8.0 - Add support raster config init in the kernel
+ * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
+ * - 3.11.0 - Add support for sensor query info (clocks, temp, etc).
+ * - 3.12.0 - Add query for double offchip LDS buffers
+ * - 3.13.0 - Add PRT support
+ * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality
+ * - 3.15.0 - Export more gpu info for gfx9
+ * - 3.16.0 - Add reserved vmid support
+ * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS.
+ * - 3.18.0 - Export gpu always on cu bitmap
+ * - 3.19.0 - Add support for UVD MJPEG decode
+ * - 3.20.0 - Add support for local BOs
+ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
+ * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
+ * - 3.23.0 - Add query for VRAM lost counter
+ * - 3.24.0 - Add high priority compute support for gfx9
+ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
+ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
+ * - 3.31.0 - Add support for per-flip tiling attribute changes with DC
+ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
+ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
+ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches
+ * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask
+ * - 3.36.0 - Allow reading more status registers on si/cik
+ * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness
+ * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
+ * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
+ * - 3.41.0 - Add video codec query
+ * - 3.42.0 - Add 16bpc fixed point display support
+ * - 3.43.0 - Add device hot plug/unplug support
+ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
+ * - 3.45.0 - Add context ioctl stable pstate interface
+ * - 3.46.0 - To enable hot plug amdgpu tests in libdrm
+ * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.48.0 - Add IP discovery version info to HW INFO
+ * - 3.49.0 - Add gang submit into CS IOCTL
+ * - 3.50.0 - Update AMDGPU_INFO_DEV_INFO IOCTL for minimum engine and memory clock
+ *            Update AMDGPU_INFO_SENSOR IOCTL for PEAK_PSTATE engine and memory clock
+ *   3.51.0 - Return the PCIe gen and lanes from the INFO ioctl
+ *   3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields:
+ *            tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size,
+ *            gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi
+ *   3.53.0 - Support for GFX11 CP GFX shadowing
+ *   3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support
+ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query
+ * - 3.56.0 - Update IB start address and size alignment for decode and encode
+ * - 3.57.0 - Compute tunneling on GFX10+
+ * - 3.58.0 - Add GFX12 DCC support
+ * - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
+ * - 3.61.0 - Contains fix for RV/PCO compute queues
+ * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
+ * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
+ */
+#define KMS_DRIVER_MAJOR	3
+#define KMS_DRIVER_MINOR	64
+#define KMS_DRIVER_PATCHLEVEL	0
+
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+	AMDGPU_DEBUG_VM = BIT(0),
+	AMDGPU_DEBUG_LARGEBAR = BIT(1),
+	AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
+	AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
+	AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
+	AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+	AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+	AMDGPU_DEBUG_SMU_POOL = BIT(7),
+	AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+	AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+	AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
+};
+
+unsigned int amdgpu_vram_limit = UINT_MAX;
+int amdgpu_vis_vram_limit;
+int amdgpu_gart_size = -1; /* auto */
+int amdgpu_gtt_size = -1; /* auto */
+int amdgpu_moverate = -1; /* auto */
+int amdgpu_audio = -1;
+int amdgpu_disp_priority;
+int amdgpu_hw_i2c;
+int amdgpu_pcie_gen2 = -1;
+int amdgpu_msi = -1;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH];
+int amdgpu_dpm = -1;
+int amdgpu_fw_load_type = -1;
+int amdgpu_aspm = -1;
+int amdgpu_runtime_pm = -1;
+uint amdgpu_ip_block_mask = 0xffffffff;
+int amdgpu_bapm = -1;
+int amdgpu_deep_color;
+int amdgpu_vm_size = -1;
+int amdgpu_vm_fragment_size = -1;
+int amdgpu_vm_block_size = -1;
+int amdgpu_vm_fault_stop;
+int amdgpu_vm_update_mode = -1;
+int amdgpu_exp_hw_support;
+int amdgpu_dc = -1;
+int amdgpu_sched_jobs = 32;
+int amdgpu_sched_hw_submission = 2;
+uint amdgpu_pcie_gen_cap;
+uint amdgpu_pcie_lane_cap;
+u64 amdgpu_cg_mask = 0xffffffffffffffff;
+uint amdgpu_pg_mask = 0xffffffff;
+uint amdgpu_sdma_phase_quantum = 32;
+char *amdgpu_disable_cu;
+char *amdgpu_virtual_display;
+int amdgpu_enforce_isolation = -1;
+int amdgpu_modeset = -1;
+
+/* Specifies the default granularity for SVM, used in buffer
+ * migration and restoration of backing memory when handling
+ * recoverable page faults.
+ *
+ * The value is given as log(numPages(buffer)); for a 2 MiB
+ * buffer it computes to be 9
+ */
+uint amdgpu_svm_default_granularity = 9;
+
+/*
+ * OverDrive(bit 14) disabled by default
+ * GFX DCS(bit 19) disabled by default
+ */
+uint amdgpu_pp_feature_mask = 0xfff7bfff;
+uint amdgpu_force_long_training;
+int amdgpu_lbpw = -1;
+int amdgpu_compute_multipipe = -1;
+int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_emu_mode;
+uint amdgpu_smu_memory_pool_size;
+int amdgpu_smu_pptable_id = -1;
+/*
+ * FBC (bit 0) disabled by default
+ * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default
+ *   - With this, for multiple monitors in sync(e.g. with the same model),
+ *     mclk switching will be allowed. And the mclk will be not foced to the
+ *     highest. That helps saving some idle power.
+ * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default
+ * PSR (bit 3) disabled by default
+ * EDP NO POWER SEQUENCING (bit 4) disabled by default
+ */
+uint amdgpu_dc_feature_mask = 2;
+uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_visual_confirm;
+int amdgpu_async_gfx_ring = 1;
+int amdgpu_mcbp = -1;
+int amdgpu_discovery = -1;
+int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
+int amdgpu_mes_kiq;
+int amdgpu_uni_mes = 1;
+int amdgpu_noretry = -1;
+int amdgpu_force_asic_type = -1;
+int amdgpu_tmz = -1; /* auto */
+uint amdgpu_freesync_vid_mode;
+int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq = -1;
+int amdgpu_smartshift_bias;
+int amdgpu_use_xgmi_p2p = 1;
+int amdgpu_vcnfw_log;
+int amdgpu_sg_display = -1; /* auto */
+int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+int amdgpu_umsch_mm;
+int amdgpu_seamless = -1; /* auto */
+uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
+int amdgpu_wbrf = -1;
+int amdgpu_damage_clips = -1; /* auto */
+int amdgpu_umsch_mm_fwlog;
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
+
+DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
+			"DRM_UT_CORE",
+			"DRM_UT_DRIVER",
+			"DRM_UT_KMS",
+			"DRM_UT_PRIME",
+			"DRM_UT_ATOMIC",
+			"DRM_UT_VBL",
+			"DRM_UT_STATE",
+			"DRM_UT_LEASE",
+			"DRM_UT_DP",
+			"DRM_UT_DRMRES");
+
+struct amdgpu_mgpu_info mgpu_info = {
+	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
+};
+int amdgpu_ras_enable = -1;
+uint amdgpu_ras_mask = 0xffffffff;
+int amdgpu_bad_page_threshold = -1;
+struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
+	.timeout_fatal_disable = false,
+	.period = 0x0, /* default to 0x0 (timeout disable) */
+};
+
+/**
+ * DOC: vramlimit (int)
+ * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
+ */
+MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
+module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
+
+/**
+ * DOC: vis_vramlimit (int)
+ * Restrict the amount of CPU visible VRAM in MiB for testing.  The default is 0 (Use full CPU visible VRAM).
+ */
+MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
+module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
+
+/**
+ * DOC: gartsize (uint)
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
+ */
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
+module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
+
+/**
+ * DOC: gttsize (int)
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use value specified by TTM).
+ * This parameter is deprecated and will be removed in the future.
+ */
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
+module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
+
+/**
+ * DOC: moverate (int)
+ * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s).
+ */
+MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
+module_param_named(moverate, amdgpu_moverate, int, 0600);
+
+/**
+ * DOC: audio (int)
+ * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
+ */
+MODULE_PARM_DESC(audio, "HDMI/DP Audio enable for non DC displays (-1 = auto, 0 = disable, 1 = enable)");
+module_param_named(audio, amdgpu_audio, int, 0444);
+
+/**
+ * DOC: disp_priority (int)
+ * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto).
+ */
+MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");
+module_param_named(disp_priority, amdgpu_disp_priority, int, 0444);
+
+/**
+ * DOC: hw_i2c (int)
+ * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled).
+ */
+MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
+module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444);
+
+/**
+ * DOC: pcie_gen2 (int)
+ * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
+module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
+
+/**
+ * DOC: msi (int)
+ * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(msi, amdgpu_msi, int, 0444);
+
+/**
+ * DOC: svm_default_granularity (uint)
+ * Used in buffer migration and handling of recoverable page faults
+ */
+MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
+module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
+
+/**
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [single value] for setting all timeouts at once or
+ * [GFX,Compute,SDMA,Video] to set individual timeouts.
+ * Negative values mean infinity.
+ *
+ * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
+ */
+MODULE_PARM_DESC(lockup_timeout,
+		 "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout,
+		    sizeof(amdgpu_lockup_timeout), 0444);
+
+/**
+ * DOC: dpm (int)
+ * Override for dynamic power management setting
+ * (0 = disable, 1 = enable)
+ * The default is -1 (auto).
+ */
+MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(dpm, amdgpu_dpm, int, 0444);
+
+/**
+ * DOC: fw_load_type (int)
+ * Set different firmware loading type for debugging, if supported.
+ * Set to 0 to force direct loading if supported by the ASIC.  Set
+ * to -1 to select the default loading mode for the ASIC, as defined
+ * by the driver.  The default is -1 (auto).
+ */
+MODULE_PARM_DESC(fw_load_type, "firmware loading type (3 = rlc backdoor autoload if supported, 2 = smu load if supported, 1 = psp load, 0 = force direct if supported, -1 = auto)");
+module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
+
+/**
+ * DOC: aspm (int)
+ * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(aspm, amdgpu_aspm, int, 0444);
+
+/**
+ * DOC: runpm (int)
+ * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
+ * the dGPUs when they are idle if supported. The default is -1 (auto enable).
+ * Setting the value to 0 disables this functionality.
+ * Setting the value to -2 is auto enabled with power down when displays are attached.
+ */
+MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)");
+module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
+
+/**
+ * DOC: ip_block_mask (uint)
+ * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.).
+ * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have
+ * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in
+ * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
+ */
+MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
+module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+
+/**
+ * DOC: bapm (int)
+ * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it.
+ * The default -1 (auto, enabled)
+ */
+MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(bapm, amdgpu_bapm, int, 0444);
+
+/**
+ * DOC: deep_color (int)
+ * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
+module_param_named(deep_color, amdgpu_deep_color, int, 0444);
+
+/**
+ * DOC: vm_size (int)
+ * Override the size of the GPU's per client virtual address space in GiB.  The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
+module_param_named(vm_size, amdgpu_vm_size, int, 0444);
+
+/**
+ * DOC: vm_fragment_size (int)
+ * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
+module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
+
+/**
+ * DOC: vm_block_size (int)
+ * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
+module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
+
+/**
+ * DOC: vm_fault_stop (int)
+ * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop).
+ */
+MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
+module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
+
+/**
+ * DOC: vm_update_mode (int)
+ * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
+ * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
+ */
+MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
+module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+
+/**
+ * DOC: exp_hw_support (int)
+ * Enable experimental hw support (1 = enable). The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
+module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+
+/**
+ * DOC: dc (int)
+ * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic).
+ */
+MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");
+module_param_named(dc, amdgpu_dc, int, 0444);
+
+/**
+ * DOC: sched_jobs (int)
+ * Override the max number of jobs supported in the sw queue. The default is 32.
+ */
+MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
+module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
+
+/**
+ * DOC: sched_hw_submission (int)
+ * Override the max number of HW submissions. The default is 2.
+ */
+MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
+module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
+
+/**
+ * DOC: ppfeaturemask (hexint)
+ * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable power features.
+ */
+MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
+module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444);
+
+/**
+ * DOC: forcelongtraining (uint)
+ * Force long memory training in resume.
+ * The default is zero, indicates short training in resume.
+ */
+MODULE_PARM_DESC(forcelongtraining, "force memory long training");
+module_param_named(forcelongtraining, amdgpu_force_long_training, uint, 0444);
+
+/**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
+MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
+module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+
+/**
+ * DOC: pcie_lane_cap (uint)
+ * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
+MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
+/**
+ * DOC: cg_mask (ullong)
+ * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled).
+ */
+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444);
+
+/**
+ * DOC: pg_mask (uint)
+ * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+/**
+ * DOC: sdma_phase_quantum (uint)
+ * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32.
+ */
+MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
+module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+
+/**
+ * DOC: disable_cu (charp)
+ * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL.
+ */
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
+/**
+ * DOC: virtual_display (charp)
+ * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards
+ * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of
+ * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci
+ * device at 26:00.0. The default is NULL.
+ */
+MODULE_PARM_DESC(virtual_display,
+		 "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
+module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
+
+/**
+ * DOC: lbpw (int)
+ * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
+MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(lbpw, amdgpu_lbpw, int, 0444);
+
+MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
+module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+
+/**
+ * DOC: gpu_recovery (int)
+ * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
+ */
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+
+/**
+ * DOC: emu_mode (int)
+ * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
+module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444);
+
+/**
+ * DOC: ras_enable (int)
+ * Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))
+ */
+MODULE_PARM_DESC(ras_enable, "Enable RAS features on the GPU (0 = disable, 1 = enable, -1 = auto (default))");
+module_param_named(ras_enable, amdgpu_ras_enable, int, 0444);
+
+/**
+ * DOC: ras_mask (uint)
+ * Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1
+ * See the flags in drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+ */
+MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), only valid when ras_enable == 1");
+module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);
+
+/**
+ * DOC: timeout_fatal_disable (bool)
+ * Disable Watchdog timeout fatal error event
+ */
+MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)");
+module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644);
+
+/**
+ * DOC: timeout_period (uint)
+ * Modify the watchdog timeout max_cycles as (1 << period)
+ */
+MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)");
+module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
+
+/**
+ * DOC: si_support (int)
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * SI (Southern Islands) are first generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support SI.
+ * By default, SI dedicated GPUs are supported by amdgpu.
+ * Only relevant when CONFIG_DRM_AMDGPU_SI is enabled to build SI support in amdgpu.
+ * See also radeon.si_support which should be disabled when amdgpu.si_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_si_support = -1;
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled, -1 = default)");
+module_param_named(si_support, amdgpu_si_support, int, 0444);
+#endif
+
+/**
+ * DOC: cik_support (int)
+ * 1 = enabled, 0 = disabled, -1 = default
+ *
+ * CIK (Sea Islands) are second generation GCN GPUs, supported by both
+ * drivers: radeon (old) and amdgpu (new). This parameter controls whether
+ * amdgpu should support CIK.
+ * By default:
+ * - CIK dedicated GPUs are supported by amdgpu.
+ * - CIK APUs are supported by radeon (except when radeon is not built).
+ * Only relevant when CONFIG_DRM_AMDGPU_CIK is enabled to build CIK support in amdgpu.
+ * See also radeon.cik_support which should be disabled when amdgpu.cik_support is
+ * enabled, and vice versa.
+ */
+int amdgpu_cik_support = -1;
+#ifdef CONFIG_DRM_AMDGPU_CIK
+MODULE_PARM_DESC(cik_support, "CIK support  (1 = enabled, 0 = disabled, -1 = default)");
+module_param_named(cik_support, amdgpu_cik_support, int, 0444);
+#endif
+
+/**
+ * DOC: smu_memory_pool_size (uint)
+ * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB.
+ * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
+ */
+MODULE_PARM_DESC(smu_memory_pool_size,
+	"reserve gtt for smu debug usage, 0 = disable,0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
+/**
+ * DOC: async_gfx_ring (int)
+ * It is used to enable gfx rings that could be configured with different prioritites or equal priorities
+ */
+MODULE_PARM_DESC(async_gfx_ring,
+	"Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))");
+module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
+
+/**
+ * DOC: mcbp (int)
+ * It is used to enable mid command buffer preemption. (0 = disabled, 1 = enabled, -1 auto (default))
+ */
+MODULE_PARM_DESC(mcbp,
+	"Enable Mid-command buffer preemption (0 = disabled, 1 = enabled), -1 = auto (default)");
+module_param_named(mcbp, amdgpu_mcbp, int, 0444);
+
+/**
+ * DOC: discovery (int)
+ * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
+ * (-1 = auto (default), 0 = disabled, 1 = enabled, 2 = use ip_discovery table from file)
+ */
+MODULE_PARM_DESC(discovery,
+	"Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
+module_param_named(discovery, amdgpu_discovery, int, 0444);
+
+/**
+ * DOC: mes (int)
+ * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes,
+	"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(mes, amdgpu_mes, int, 0444);
+
+/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+	"Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
+ * DOC: mes_kiq (int)
+ * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_kiq,
+	"Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
+
+/**
+ * DOC: uni_mes (int)
+ * Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(uni_mes,
+	"Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)");
+module_param_named(uni_mes, amdgpu_uni_mes, int, 0444);
+
+/**
+ * DOC: noretry (int)
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
+ * (0 = retry enabled, 1 = retry disabled, -1 auto (default))
+ */
+MODULE_PARM_DESC(noretry,
+	"Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))");
+module_param_named(noretry, amdgpu_noretry, int, 0644);
+
+/**
+ * DOC: force_asic_type (int)
+ * A non negative value used to specify the asic type for all supported GPUs.
+ */
+MODULE_PARM_DESC(force_asic_type,
+	"A non negative value used to specify the asic type for all supported GPUs");
+module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444);
+
+/**
+ * DOC: use_xgmi_p2p (int)
+ * Enables/disables XGMI P2P interface (0 = disable, 1 = enable).
+ */
+MODULE_PARM_DESC(use_xgmi_p2p,
+	"Enable XGMI P2P interface (0 = disable; 1 = enable (default))");
+module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
+
+
+#ifdef CONFIG_HSA_AMD
+/**
+ * DOC: sched_policy (int)
+ * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
+ * Setting 1 disables over-subscription. Setting 2 disables HWS and statically
+ * assigns queues to HQDs.
+ */
+int sched_policy = KFD_SCHED_POLICY_HWS;
+module_param_unsafe(sched_policy, int, 0444);
+MODULE_PARM_DESC(sched_policy,
+	"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
+/**
+ * DOC: hws_max_conc_proc (int)
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
+ * number of VMIDs assigned to the HWS, which is also the default.
+ */
+int hws_max_conc_proc = -1;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+	"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+/**
+ * DOC: cwsr_enable (int)
+ * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
+ * the middle of a compute wave. Default is 1 to enable this feature. Setting 0
+ * disables it.
+ */
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
+/**
+ * DOC: max_num_of_queues_per_device (int)
+ * Maximum number of queues per device. Valid setting is between 1 and 4096. Default
+ * is 4096.
+ */
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+
+/**
+ * DOC: send_sigterm (int)
+ * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
+ * but just print errors on dmesg. Setting 1 enables sending sigterm.
+ */
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
+/**
+ * DOC: halt_if_hws_hang (int)
+ * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
+ * Setting 1 enables halt on hang.
+ */
+int halt_if_hws_hang;
+module_param_unsafe(halt_if_hws_hang, int, 0644);
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Assume that HWS supports GWS barriers regardless of what firmware version
+ * check says. Default value: false (rely on MEC2 firmware version check).
+ */
+bool hws_gws_support;
+module_param_unsafe(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
+
+/**
+ * DOC: queue_preemption_timeout_ms (int)
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+ */
+int queue_preemption_timeout_ms = 9000;
+module_param(queue_preemption_timeout_ms, int, 0644);
+MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+#endif
+
+/**
+ * DOC: mtype_local (int)
+ */
+int amdgpu_mtype_local;
+MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
+module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
+
+/**
+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+#ifdef CONFIG_HSA_AMD_P2P
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
+#endif
+
+/**
+ * DOC: dcfeaturemask (uint)
+ * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable display features.
+ */
+MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
+module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
+
+/**
+ * DOC: dcdebugmask (uint)
+ * Display debug options. See enum DC_DEBUG_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ */
+MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
+module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+
+MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
+module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
+
+/**
+ * DOC: abmlevel (uint)
+ * Override the default ABM (Adaptive Backlight Management) level used for DC
+ * enabled hardware. Requires DMCU to be supported and loaded.
+ * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by
+ * default. Values 1-4 control the maximum allowable brightness reduction via
+ * the ABM algorithm, with 1 being the least reduction and 4 being the most
+ * reduction.
+ *
+ * Defaults to -1, or auto. Userspace can only override this level after
+ * boot if it's set to auto.
+ */
+int amdgpu_dm_abm_level = -1;
+MODULE_PARM_DESC(abmlevel,
+		 "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))");
+module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444);
+
+int amdgpu_backlight = -1;
+MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
+module_param_named(backlight, amdgpu_backlight, bint, 0444);
+
+/**
+ * DOC: damageclips (int)
+ * Enable or disable damage clips support. If damage clips support is disabled,
+ * we will force full frame updates, irrespective of what user space sends to
+ * us.
+ *
+ * Defaults to -1 (where it is enabled unless a PSR-SU display is detected).
+ */
+MODULE_PARM_DESC(damageclips,
+		 "Damage clips support (0 = disable, 1 = enable, -1 auto (default))");
+module_param_named(damageclips, amdgpu_damage_clips, int, 0444);
+
+/**
+ * DOC: tmz (int)
+ * Trusted Memory Zone (TMZ) is a method to protect data being written
+ * to or read from memory.
+ *
+ * The default value: 0 (off).  TODO: change to auto till it is completed.
+ */
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)");
+module_param_named(tmz, amdgpu_tmz, int, 0444);
+
+/**
+ * DOC: freesync_video (uint)
+ * Enable the optimization to adjust front porch timing to achieve seamless
+ * mode change experience when setting a freesync supported mode for which full
+ * modeset is not needed.
+ *
+ * The Display Core will add a set of modes derived from the base FreeSync
+ * video mode into the corresponding connector's mode list based on commonly
+ * used refresh rates and VRR range of the connected display, when users enable
+ * this feature. From the userspace perspective, they can see a seamless mode
+ * change experience when the change between different refresh rates under the
+ * same resolution. Additionally, userspace applications such as Video playback
+ * can read this modeset list and change the refresh rate based on the video
+ * frame rate. Finally, the userspace can also derive an appropriate mode for a
+ * particular refresh rate based on the FreeSync Mode and add it to the
+ * connector's mode list.
+ *
+ * Note: This is an experimental feature.
+ *
+ * The default value: 0 (off).
+ */
+MODULE_PARM_DESC(
+	freesync_video,
+	"Adds additional modes via VRR for refresh changes without a full modeset (0 = off (default), 1 = on)");
+module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
+
+/**
+ * DOC: reset_method (int)
+ * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
+ */
+MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
+module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644);
+
+/**
+ * DOC: bad_page_threshold (int) Bad page threshold is specifies the
+ * threshold value of faulty pages detected by RAS ECC, which may
+ * result in the GPU entering bad status when the number of total
+ * faulty pages by ECC exceeds the threshold value.
+ */
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)");
+module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
+
+MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+
+/**
+ * DOC: vcnfw_log (int)
+ * Enable vcnfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)");
+module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444);
+
+/**
+ * DOC: sg_display (int)
+ * Disable S/G (scatter/gather) display (i.e., display from system memory).
+ * This option is only relevant on APUs.  Set this option to 0 to disable
+ * S/G display if you experience flickering or other issues under memory
+ * pressure and report the issue.
+ */
+MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)");
+module_param_named(sg_display, amdgpu_sg_display, int, 0444);
+
+/**
+ * DOC: umsch_mm (int)
+ * Enable Multi Media User Mode Scheduler. This is a HW scheduling engine for VCN and VPE.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(umsch_mm,
+	"Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444);
+
+/**
+ * DOC: umsch_mm_fwlog (int)
+ * Enable umschfw log output for debugging, the default is disabled.
+ */
+MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)");
+module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444);
+
+/**
+ * DOC: smu_pptable_id (int)
+ * Used to override pptable id. id = 0 use VBIOS pptable.
+ * id > 0 use the soft pptable with specicfied id.
+ */
+MODULE_PARM_DESC(smu_pptable_id,
+	"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
+module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+
+/**
+ * DOC: partition_mode (int)
+ * Used to override the default SPX mode.
+ */
+MODULE_PARM_DESC(
+	user_partt_mode,
+	"specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \
+						0 = AMDGPU_SPX_PARTITION_MODE, \
+						1 = AMDGPU_DPX_PARTITION_MODE, \
+						2 = AMDGPU_TPX_PARTITION_MODE, \
+						3 = AMDGPU_QPX_PARTITION_MODE, \
+						4 = AMDGPU_CPX_PARTITION_MODE)");
+module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+
+/**
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
+ */
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
+
+/**
+ * DOC: modeset (int)
+ * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
+ */
+MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
+module_param_named(modeset, amdgpu_modeset, int, 0444);
+
+/**
+ * DOC: seamless (int)
+ * Seamless boot will keep the image on the screen during the boot process.
+ */
+MODULE_PARM_DESC(seamless, "Seamless boot (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(seamless, amdgpu_seamless, int, 0444);
+
+/**
+ * DOC: debug_mask (uint)
+ * Debug options for amdgpu, work as a binary mask with the following options:
+ *
+ * - 0x1: Debug VM handling
+ * - 0x2: Enable simulating large-bar capability on non-large bar system. This
+ *   limits the VRAM size reported to ROCm applications to the visible
+ *   size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
+ */
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
+
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture.  This provides an aperture in the GPU's internal
+ * address space for direct access to system memory.  Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
+/**
+ * DOC: wbrf (int)
+ * Enable Wifi RFI interference mitigation feature.
+ * Due to electrical and mechanical constraints there may be likely interference of
+ * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio
+ * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference,
+ * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based
+ * on active list of frequencies in-use (to be avoided) as part of initial setting or
+ * P-state transition. However, there may be potential performance impact with this
+ * feature enabled.
+ * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported))
+ */
+MODULE_PARM_DESC(wbrf,
+	"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
+module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing.  Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR.  The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * -  0 = user queues disabled
+ * -  1 = user queues enabled and kernel queues enabled (if supported)
+ * -  2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
+
+/* These devices are not supported by amdgpu.
+ * They are supported by the mach64, r128, radeon drivers
+ */
+static const u16 amdgpu_unsupported_pciidlist[] = {
+	/* mach64 */
+	0x4354,
+	0x4358,
+	0x4554,
+	0x4742,
+	0x4744,
+	0x4749,
+	0x474C,
+	0x474D,
+	0x474E,
+	0x474F,
+	0x4750,
+	0x4751,
+	0x4752,
+	0x4753,
+	0x4754,
+	0x4755,
+	0x4756,
+	0x4757,
+	0x4758,
+	0x4759,
+	0x475A,
+	0x4C42,
+	0x4C44,
+	0x4C47,
+	0x4C49,
+	0x4C4D,
+	0x4C4E,
+	0x4C50,
+	0x4C51,
+	0x4C52,
+	0x4C53,
+	0x5654,
+	0x5655,
+	0x5656,
+	/* r128 */
+	0x4c45,
+	0x4c46,
+	0x4d46,
+	0x4d4c,
+	0x5041,
+	0x5042,
+	0x5043,
+	0x5044,
+	0x5045,
+	0x5046,
+	0x5047,
+	0x5048,
+	0x5049,
+	0x504A,
+	0x504B,
+	0x504C,
+	0x504D,
+	0x504E,
+	0x504F,
+	0x5050,
+	0x5051,
+	0x5052,
+	0x5053,
+	0x5054,
+	0x5055,
+	0x5056,
+	0x5057,
+	0x5058,
+	0x5245,
+	0x5246,
+	0x5247,
+	0x524b,
+	0x524c,
+	0x534d,
+	0x5446,
+	0x544C,
+	0x5452,
+	/* radeon */
+	0x3150,
+	0x3151,
+	0x3152,
+	0x3154,
+	0x3155,
+	0x3E50,
+	0x3E54,
+	0x4136,
+	0x4137,
+	0x4144,
+	0x4145,
+	0x4146,
+	0x4147,
+	0x4148,
+	0x4149,
+	0x414A,
+	0x414B,
+	0x4150,
+	0x4151,
+	0x4152,
+	0x4153,
+	0x4154,
+	0x4155,
+	0x4156,
+	0x4237,
+	0x4242,
+	0x4336,
+	0x4337,
+	0x4437,
+	0x4966,
+	0x4967,
+	0x4A48,
+	0x4A49,
+	0x4A4A,
+	0x4A4B,
+	0x4A4C,
+	0x4A4D,
+	0x4A4E,
+	0x4A4F,
+	0x4A50,
+	0x4A54,
+	0x4B48,
+	0x4B49,
+	0x4B4A,
+	0x4B4B,
+	0x4B4C,
+	0x4C57,
+	0x4C58,
+	0x4C59,
+	0x4C5A,
+	0x4C64,
+	0x4C66,
+	0x4C67,
+	0x4E44,
+	0x4E45,
+	0x4E46,
+	0x4E47,
+	0x4E48,
+	0x4E49,
+	0x4E4A,
+	0x4E4B,
+	0x4E50,
+	0x4E51,
+	0x4E52,
+	0x4E53,
+	0x4E54,
+	0x4E56,
+	0x5144,
+	0x5145,
+	0x5146,
+	0x5147,
+	0x5148,
+	0x514C,
+	0x514D,
+	0x5157,
+	0x5158,
+	0x5159,
+	0x515A,
+	0x515E,
+	0x5460,
+	0x5462,
+	0x5464,
+	0x5548,
+	0x5549,
+	0x554A,
+	0x554B,
+	0x554C,
+	0x554D,
+	0x554E,
+	0x554F,
+	0x5550,
+	0x5551,
+	0x5552,
+	0x5554,
+	0x564A,
+	0x564B,
+	0x564F,
+	0x5652,
+	0x5653,
+	0x5657,
+	0x5834,
+	0x5835,
+	0x5954,
+	0x5955,
+	0x5974,
+	0x5975,
+	0x5960,
+	0x5961,
+	0x5962,
+	0x5964,
+	0x5965,
+	0x5969,
+	0x5a41,
+	0x5a42,
+	0x5a61,
+	0x5a62,
+	0x5b60,
+	0x5b62,
+	0x5b63,
+	0x5b64,
+	0x5b65,
+	0x5c61,
+	0x5c63,
+	0x5d48,
+	0x5d49,
+	0x5d4a,
+	0x5d4c,
+	0x5d4d,
+	0x5d4e,
+	0x5d4f,
+	0x5d50,
+	0x5d52,
+	0x5d57,
+	0x5e48,
+	0x5e4a,
+	0x5e4b,
+	0x5e4c,
+	0x5e4d,
+	0x5e4f,
+	0x6700,
+	0x6701,
+	0x6702,
+	0x6703,
+	0x6704,
+	0x6705,
+	0x6706,
+	0x6707,
+	0x6708,
+	0x6709,
+	0x6718,
+	0x6719,
+	0x671c,
+	0x671d,
+	0x671f,
+	0x6720,
+	0x6721,
+	0x6722,
+	0x6723,
+	0x6724,
+	0x6725,
+	0x6726,
+	0x6727,
+	0x6728,
+	0x6729,
+	0x6738,
+	0x6739,
+	0x673e,
+	0x6740,
+	0x6741,
+	0x6742,
+	0x6743,
+	0x6744,
+	0x6745,
+	0x6746,
+	0x6747,
+	0x6748,
+	0x6749,
+	0x674A,
+	0x6750,
+	0x6751,
+	0x6758,
+	0x6759,
+	0x675B,
+	0x675D,
+	0x675F,
+	0x6760,
+	0x6761,
+	0x6762,
+	0x6763,
+	0x6764,
+	0x6765,
+	0x6766,
+	0x6767,
+	0x6768,
+	0x6770,
+	0x6771,
+	0x6772,
+	0x6778,
+	0x6779,
+	0x677B,
+	0x6840,
+	0x6841,
+	0x6842,
+	0x6843,
+	0x6849,
+	0x684C,
+	0x6850,
+	0x6858,
+	0x6859,
+	0x6880,
+	0x6888,
+	0x6889,
+	0x688A,
+	0x688C,
+	0x688D,
+	0x6898,
+	0x6899,
+	0x689b,
+	0x689c,
+	0x689d,
+	0x689e,
+	0x68a0,
+	0x68a1,
+	0x68a8,
+	0x68a9,
+	0x68b0,
+	0x68b8,
+	0x68b9,
+	0x68ba,
+	0x68be,
+	0x68bf,
+	0x68c0,
+	0x68c1,
+	0x68c7,
+	0x68c8,
+	0x68c9,
+	0x68d8,
+	0x68d9,
+	0x68da,
+	0x68de,
+	0x68e0,
+	0x68e1,
+	0x68e4,
+	0x68e5,
+	0x68e8,
+	0x68e9,
+	0x68f1,
+	0x68f2,
+	0x68f8,
+	0x68f9,
+	0x68fa,
+	0x68fe,
+	0x7100,
+	0x7101,
+	0x7102,
+	0x7103,
+	0x7104,
+	0x7105,
+	0x7106,
+	0x7108,
+	0x7109,
+	0x710A,
+	0x710B,
+	0x710C,
+	0x710E,
+	0x710F,
+	0x7140,
+	0x7141,
+	0x7142,
+	0x7143,
+	0x7144,
+	0x7145,
+	0x7146,
+	0x7147,
+	0x7149,
+	0x714A,
+	0x714B,
+	0x714C,
+	0x714D,
+	0x714E,
+	0x714F,
+	0x7151,
+	0x7152,
+	0x7153,
+	0x715E,
+	0x715F,
+	0x7180,
+	0x7181,
+	0x7183,
+	0x7186,
+	0x7187,
+	0x7188,
+	0x718A,
+	0x718B,
+	0x718C,
+	0x718D,
+	0x718F,
+	0x7193,
+	0x7196,
+	0x719B,
+	0x719F,
+	0x71C0,
+	0x71C1,
+	0x71C2,
+	0x71C3,
+	0x71C4,
+	0x71C5,
+	0x71C6,
+	0x71C7,
+	0x71CD,
+	0x71CE,
+	0x71D2,
+	0x71D4,
+	0x71D5,
+	0x71D6,
+	0x71DA,
+	0x71DE,
+	0x7200,
+	0x7210,
+	0x7211,
+	0x7240,
+	0x7243,
+	0x7244,
+	0x7245,
+	0x7246,
+	0x7247,
+	0x7248,
+	0x7249,
+	0x724A,
+	0x724B,
+	0x724C,
+	0x724D,
+	0x724E,
+	0x724F,
+	0x7280,
+	0x7281,
+	0x7283,
+	0x7284,
+	0x7287,
+	0x7288,
+	0x7289,
+	0x728B,
+	0x728C,
+	0x7290,
+	0x7291,
+	0x7293,
+	0x7297,
+	0x7834,
+	0x7835,
+	0x791e,
+	0x791f,
+	0x793f,
+	0x7941,
+	0x7942,
+	0x796c,
+	0x796d,
+	0x796e,
+	0x796f,
+	0x9400,
+	0x9401,
+	0x9402,
+	0x9403,
+	0x9405,
+	0x940A,
+	0x940B,
+	0x940F,
+	0x94A0,
+	0x94A1,
+	0x94A3,
+	0x94B1,
+	0x94B3,
+	0x94B4,
+	0x94B5,
+	0x94B9,
+	0x9440,
+	0x9441,
+	0x9442,
+	0x9443,
+	0x9444,
+	0x9446,
+	0x944A,
+	0x944B,
+	0x944C,
+	0x944E,
+	0x9450,
+	0x9452,
+	0x9456,
+	0x945A,
+	0x945B,
+	0x945E,
+	0x9460,
+	0x9462,
+	0x946A,
+	0x946B,
+	0x947A,
+	0x947B,
+	0x9480,
+	0x9487,
+	0x9488,
+	0x9489,
+	0x948A,
+	0x948F,
+	0x9490,
+	0x9491,
+	0x9495,
+	0x9498,
+	0x949C,
+	0x949E,
+	0x949F,
+	0x94C0,
+	0x94C1,
+	0x94C3,
+	0x94C4,
+	0x94C5,
+	0x94C6,
+	0x94C7,
+	0x94C8,
+	0x94C9,
+	0x94CB,
+	0x94CC,
+	0x94CD,
+	0x9500,
+	0x9501,
+	0x9504,
+	0x9505,
+	0x9506,
+	0x9507,
+	0x9508,
+	0x9509,
+	0x950F,
+	0x9511,
+	0x9515,
+	0x9517,
+	0x9519,
+	0x9540,
+	0x9541,
+	0x9542,
+	0x954E,
+	0x954F,
+	0x9552,
+	0x9553,
+	0x9555,
+	0x9557,
+	0x955f,
+	0x9580,
+	0x9581,
+	0x9583,
+	0x9586,
+	0x9587,
+	0x9588,
+	0x9589,
+	0x958A,
+	0x958B,
+	0x958C,
+	0x958D,
+	0x958E,
+	0x958F,
+	0x9590,
+	0x9591,
+	0x9593,
+	0x9595,
+	0x9596,
+	0x9597,
+	0x9598,
+	0x9599,
+	0x959B,
+	0x95C0,
+	0x95C2,
+	0x95C4,
+	0x95C5,
+	0x95C6,
+	0x95C7,
+	0x95C9,
+	0x95CC,
+	0x95CD,
+	0x95CE,
+	0x95CF,
+	0x9610,
+	0x9611,
+	0x9612,
+	0x9613,
+	0x9614,
+	0x9615,
+	0x9616,
+	0x9640,
+	0x9641,
+	0x9642,
+	0x9643,
+	0x9644,
+	0x9645,
+	0x9647,
+	0x9648,
+	0x9649,
+	0x964a,
+	0x964b,
+	0x964c,
+	0x964e,
+	0x964f,
+	0x9710,
+	0x9711,
+	0x9712,
+	0x9713,
+	0x9714,
+	0x9715,
+	0x9802,
+	0x9803,
+	0x9804,
+	0x9805,
+	0x9806,
+	0x9807,
+	0x9808,
+	0x9809,
+	0x980A,
+	0x9900,
+	0x9901,
+	0x9903,
+	0x9904,
+	0x9905,
+	0x9906,
+	0x9907,
+	0x9908,
+	0x9909,
+	0x990A,
+	0x990B,
+	0x990C,
+	0x990D,
+	0x990E,
+	0x990F,
+	0x9910,
+	0x9913,
+	0x9917,
+	0x9918,
+	0x9919,
+	0x9990,
+	0x9991,
+	0x9992,
+	0x9993,
+	0x9994,
+	0x9995,
+	0x9996,
+	0x9997,
+	0x9998,
+	0x9999,
+	0x999A,
+	0x999B,
+	0x999C,
+	0x999D,
+	0x99A0,
+	0x99A2,
+	0x99A4,
+	/* radeon secondary ids */
+	0x3171,
+	0x3e70,
+	0x4164,
+	0x4165,
+	0x4166,
+	0x4168,
+	0x4170,
+	0x4171,
+	0x4172,
+	0x4173,
+	0x496e,
+	0x4a69,
+	0x4a6a,
+	0x4a6b,
+	0x4a70,
+	0x4a74,
+	0x4b69,
+	0x4b6b,
+	0x4b6c,
+	0x4c6e,
+	0x4e64,
+	0x4e65,
+	0x4e66,
+	0x4e67,
+	0x4e68,
+	0x4e69,
+	0x4e6a,
+	0x4e71,
+	0x4f73,
+	0x5569,
+	0x556b,
+	0x556d,
+	0x556f,
+	0x5571,
+	0x5854,
+	0x5874,
+	0x5940,
+	0x5941,
+	0x5b70,
+	0x5b72,
+	0x5b73,
+	0x5b74,
+	0x5b75,
+	0x5d44,
+	0x5d45,
+	0x5d6d,
+	0x5d6f,
+	0x5d72,
+	0x5d77,
+	0x5e6b,
+	0x5e6d,
+	0x7120,
+	0x7124,
+	0x7129,
+	0x712e,
+	0x712f,
+	0x7162,
+	0x7163,
+	0x7166,
+	0x7167,
+	0x7172,
+	0x7173,
+	0x71a0,
+	0x71a1,
+	0x71a3,
+	0x71a7,
+	0x71bb,
+	0x71e0,
+	0x71e1,
+	0x71e2,
+	0x71e6,
+	0x71e7,
+	0x71f2,
+	0x7269,
+	0x726b,
+	0x726e,
+	0x72a0,
+	0x72a8,
+	0x72b1,
+	0x72b3,
+	0x793f,
+};
+
+static const struct pci_device_id pciidlist[] = {
+	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x678A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6790, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6791, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6792, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6798, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6799, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x679F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
+	{0x1002, 0x6800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6801, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN|AMD_IS_MOBILITY},
+	{0x1002, 0x6806, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6810, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6811, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6816, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6817, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6818, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6819, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PITCAIRN},
+	{0x1002, 0x6600, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6617, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6620, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6621, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6623, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|AMD_IS_MOBILITY},
+	{0x1002, 0x6631, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND},
+	{0x1002, 0x6820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6821, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6822, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6823, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6824, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6826, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|AMD_IS_MOBILITY},
+	{0x1002, 0x6835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x683F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE},
+	{0x1002, 0x6660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6663, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6664, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
+	/* Kaveri */
+	{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x1318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	{0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
+	/* Bonaire */
+	{0x1002, 0x6640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+	{0x1002, 0x6641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+	{0x1002, 0x6646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+	{0x1002, 0x6647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|AMD_IS_MOBILITY},
+	{0x1002, 0x6649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x6650, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x6651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x6658, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x665c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x665d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	{0x1002, 0x665f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE},
+	/* Hawaii */
+	{0x1002, 0x67A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67AA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67B0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67B8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67B9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67BA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	{0x1002, 0x67BE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAWAII},
+	/* Kabini */
+	{0x1002, 0x9830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x9832, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9833, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x9834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x9836, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x9838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x983a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x983b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x983c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x983d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x983e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	{0x1002, 0x983f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KABINI|AMD_IS_APU},
+	/* mullins */
+	{0x1002, 0x9850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9851, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9852, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9853, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9854, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9855, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9856, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9857, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9858, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x9859, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
+	/* topaz */
+	{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	{0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
+	/* tonga */
+	{0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6929, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x692B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x692F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6930, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6938, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	{0x1002, 0x6939, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA},
+	/* fiji */
+	{0x1002, 0x7300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
+	{0x1002, 0x730F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_FIJI},
+	/* carrizo */
+	{0x1002, 0x9870, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+	{0x1002, 0x9874, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+	{0x1002, 0x9875, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+	{0x1002, 0x9876, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+	{0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU},
+	/* stoney */
+	{0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU},
+	/* Polaris11 */
+	{0x1002, 0x67E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	{0x1002, 0x67E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11},
+	/* Polaris10 */
+	{0x1002, 0x67C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67D0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	{0x1002, 0x6FDF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+	/* Polaris12 */
+	{0x1002, 0x6980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	/* VEGAM */
+	{0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	{0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	{0x1002, 0x694F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	/* Vega 10 */
+	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6869, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	/* Vega 12 */
+	{0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	/* Vega 20 */
+	{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	/* Raven */
+	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+	{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+	/* Arcturus */
+	{0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+	{0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+	{0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+	{0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},
+	/* Navi10 */
+	{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	/* Navi14 */
+	{0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+	{0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+	{0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+	{0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+
+	/* Renoir */
+	{0x1002, 0x15E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+	{0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+	{0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
+
+	/* Navi12 */
+	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+	{0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+
+	/* Sienna_Cichlid */
+	{0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+
+	/* Yellow Carp */
+	{0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+	{0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+
+	/* Navy_Flounder */
+	{0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+	{0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+
+	/* DIMGREY_CAVEFISH */
+	{0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+	{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+
+	/* Aldebaran */
+	{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+	{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+	{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+	{0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN},
+
+	/* CYAN_SKILLFISH */
+	{0x1002, 0x13DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x13F9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x13FA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x13FB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x13FC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+	{0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU},
+
+	/* BEIGE_GOBY */
+	{0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+	{0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY},
+
+	{ PCI_DEVICE(0x1002, PCI_ANY_ID),
+	  .class = PCI_CLASS_DISPLAY_VGA << 8,
+	  .class_mask = 0xffffff,
+	  .driver_data = CHIP_IP_DISCOVERY },
+
+	{ PCI_DEVICE(0x1002, PCI_ANY_ID),
+	  .class = PCI_CLASS_DISPLAY_OTHER << 8,
+	  .class_mask = 0xffffff,
+	  .driver_data = CHIP_IP_DISCOVERY },
+
+	{ PCI_DEVICE(0x1002, PCI_ANY_ID),
+	  .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
+	  .class_mask = 0xffffff,
+	  .driver_data = CHIP_IP_DISCOVERY },
+
+	{0, 0, 0}
+};
+
+MODULE_DEVICE_TABLE(pci, pciidlist);
+
+static const struct amdgpu_asic_type_quirk asic_type_quirks[] = {
+	/* differentiate between P10 and P11 asics with the same DID */
+	{0x67FF, 0xE3, CHIP_POLARIS10},
+	{0x67FF, 0xE7, CHIP_POLARIS10},
+	{0x67FF, 0xF3, CHIP_POLARIS10},
+	{0x67FF, 0xF7, CHIP_POLARIS10},
+};
+
+static const struct drm_driver amdgpu_kms_driver;
+
+static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
+{
+	struct pci_dev *p = NULL;
+	int i;
+
+	/* 0 - GPU
+	 * 1 - audio
+	 * 2 - USB
+	 * 3 - UCSI
+	 */
+	for (i = 1; i < 4; i++) {
+		p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+						adev->pdev->bus->number, i);
+		if (p) {
+			pm_runtime_get_sync(&p->dev);
+			pm_runtime_put_autosuspend(&p->dev);
+			pci_dev_put(p);
+		}
+	}
+}
+
+static void amdgpu_init_debug_options(struct amdgpu_device *adev)
+{
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
+		pr_info("debug: VM handling debug enabled\n");
+		adev->debug_vm = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
+		pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+		adev->debug_largebar = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+		pr_info("debug: soft reset for GPU recovery disabled\n");
+		adev->debug_disable_soft_recovery = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) {
+		pr_info("debug: place fw in vram for frontdoor loading\n");
+		adev->debug_use_vram_fw_buf = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) {
+		pr_info("debug: enable RAS ACA\n");
+		adev->debug_enable_ras_aca = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_EXP_RESETS) {
+		pr_info("debug: enable experimental reset features\n");
+		adev->debug_exp_resets = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+		pr_info("debug: ring reset disabled\n");
+		adev->debug_disable_gpu_ring_reset = true;
+	}
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+		pr_info("debug: use vram for smu pool\n");
+		adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+	}
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+		pr_info("debug: VM mode debug for userptr is enabled\n");
+		adev->debug_vm_userptr = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+		pr_info("debug: disable kernel logs of correctable errors\n");
+		adev->debug_disable_ce_logs = true;
+	}
+
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+		pr_info("debug: allowing command submission to CE engine\n");
+		adev->debug_enable_ce_cs = true;
+	}
+}
+
+static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) {
+		if (pdev->device == asic_type_quirks[i].device &&
+			pdev->revision == asic_type_quirks[i].revision) {
+				flags &= ~AMD_ASIC_MASK;
+				flags |= asic_type_quirks[i].type;
+				break;
+			}
+	}
+
+	return flags;
+}
+
+static bool amdgpu_support_enabled(struct device *dev,
+				   const enum amd_asic_type family)
+{
+	const char *gen;
+	const char *param;
+	int module_param = -1;
+	bool radeon_support_built = IS_ENABLED(CONFIG_DRM_RADEON);
+	bool amdgpu_support_built = false;
+	bool support_by_default = false;
+
+	switch (family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+		gen = "SI";
+		param = "si_support";
+		module_param = amdgpu_si_support;
+		amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_SI);
+		support_by_default = true;
+		break;
+
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		support_by_default = true;
+		fallthrough;
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		gen = "CIK";
+		param = "cik_support";
+		module_param = amdgpu_cik_support;
+		amdgpu_support_built = IS_ENABLED(CONFIG_DRM_AMDGPU_CIK);
+		break;
+
+	default:
+		/* All other chips are supported by amdgpu only */
+		return true;
+	}
+
+	if (!amdgpu_support_built) {
+		dev_info(dev, "amdgpu built without %s support\n", gen);
+		return false;
+	}
+
+	if ((module_param == -1 && (support_by_default || !radeon_support_built)) ||
+	    module_param == 1) {
+		if (radeon_support_built)
+			dev_info(dev, "%s support provided by amdgpu.\n"
+				 "Use radeon.%s=1 amdgpu.%s=0 to override.\n",
+				 gen, param, param);
+
+		return true;
+	}
+
+	if (radeon_support_built)
+		dev_info(dev, "%s support provided by radeon.\n"
+			 "Use radeon.%s=0 amdgpu.%s=1 to override.\n",
+			 gen, param, param);
+	else if (module_param == 0)
+		dev_info(dev, "%s support disabled by module param\n", gen);
+
+	return false;
+}
+
+static int amdgpu_pci_probe(struct pci_dev *pdev,
+			    const struct pci_device_id *ent)
+{
+	struct drm_device *ddev;
+	struct amdgpu_device *adev;
+	unsigned long flags = ent->driver_data;
+	int ret, retry = 0, i;
+	bool supports_atomic = false;
+
+	if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+	    (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+		if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
+			return -EINVAL;
+	}
+
+	/* skip devices which are owned by radeon */
+	for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
+		if (amdgpu_unsupported_pciidlist[i] == pdev->device)
+			return -ENODEV;
+	}
+
+	if (amdgpu_aspm == -1 && !pcie_aspm_enabled(pdev))
+		amdgpu_aspm = 0;
+
+	if (amdgpu_virtual_display ||
+	    amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK))
+		supports_atomic = true;
+
+	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
+		DRM_INFO("This hardware requires experimental hardware support.\n"
+			 "See modparam exp_hw_support\n");
+		return -ENODEV;
+	}
+
+	flags = amdgpu_fix_asic_type(pdev, flags);
+
+	/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
+	 * however, SME requires an indirect IOMMU mapping because the encryption
+	 * bit is beyond the DMA mask of the chip.
+	 */
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
+	    ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
+		dev_info(&pdev->dev,
+			 "SME is not compatible with RAVEN\n");
+		return -ENOTSUPP;
+	}
+
+	if (!amdgpu_support_enabled(&pdev->dev, flags & AMD_ASIC_MASK))
+		return -ENODEV;
+
+	adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
+	if (IS_ERR(adev))
+		return PTR_ERR(adev);
+
+	adev->dev  = &pdev->dev;
+	adev->pdev = pdev;
+	ddev = adev_to_drm(adev);
+
+	if (!supports_atomic)
+		ddev->driver_features &= ~DRIVER_ATOMIC;
+
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	pci_set_drvdata(pdev, ddev);
+
+	amdgpu_init_debug_options(adev);
+
+	ret = amdgpu_driver_load_kms(adev, flags);
+	if (ret)
+		goto err_pci;
+
+retry_init:
+	ret = drm_dev_register(ddev, flags);
+	if (ret == -EAGAIN && ++retry <= 3) {
+		DRM_INFO("retry init %d\n", retry);
+		/* Don't request EX mode too frequently which is attacking */
+		msleep(5000);
+		goto retry_init;
+	} else if (ret) {
+		goto err_pci;
+	}
+
+	ret = amdgpu_xcp_dev_register(adev, ent);
+	if (ret)
+		goto err_pci;
+
+	ret = amdgpu_amdkfd_drm_client_create(adev);
+	if (ret)
+		goto err_pci;
+
+	/*
+	 * 1. don't init fbdev on hw without DCE
+	 * 2. don't init fbdev if there are no connectors
+	 */
+	if (adev->mode_info.mode_config_initialized &&
+	    !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+		const struct drm_format_info *format;
+
+		/* select 8 bpp console on low vram cards */
+		if (adev->gmc.real_vram_size <= (32*1024*1024))
+			format = drm_format_info(DRM_FORMAT_C8);
+		else
+			format = NULL;
+
+		drm_client_setup(adev_to_drm(adev), format);
+	}
+
+	ret = amdgpu_debugfs_init(adev);
+	if (ret)
+		DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
+
+	if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+		/* only need to skip on ATPX */
+		if (amdgpu_device_supports_px(adev))
+			dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+		/* we want direct complete for BOCO */
+		if (amdgpu_device_supports_boco(adev))
+			dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
+						DPM_FLAG_SMART_SUSPEND |
+						DPM_FLAG_MAY_SKIP_RESUME);
+		pm_runtime_use_autosuspend(ddev->dev);
+		pm_runtime_set_autosuspend_delay(ddev->dev, 5000);
+
+		pm_runtime_allow(ddev->dev);
+
+		pm_runtime_put_autosuspend(ddev->dev);
+
+		pci_wake_from_d3(pdev, TRUE);
+
+		/*
+		 * For runpm implemented via BACO, PMFW will handle the
+		 * timing for BACO in and out:
+		 *   - put ASIC into BACO state only when both video and
+		 *     audio functions are in D3 state.
+		 *   - pull ASIC out of BACO state when either video or
+		 *     audio function is in D0 state.
+		 * Also, at startup, PMFW assumes both functions are in
+		 * D0 state.
+		 *
+		 * So if snd driver was loaded prior to amdgpu driver
+		 * and audio function was put into D3 state, there will
+		 * be no PMFW-aware D-state transition(D0->D3) on runpm
+		 * suspend. Thus the BACO will be not correctly kicked in.
+		 *
+		 * Via amdgpu_get_secondary_funcs(), the audio dev is put
+		 * into D0 state. Then there will be a PMFW-aware D-state
+		 * transition(D0->D3) on runpm suspend.
+		 */
+		if (amdgpu_device_supports_baco(adev) &&
+		    !(adev->flags & AMD_IS_APU) &&
+		    adev->asic_type >= CHIP_NAVI10)
+			amdgpu_get_secondary_funcs(adev);
+	}
+
+	return 0;
+
+err_pci:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static void
+amdgpu_pci_remove(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	amdgpu_ras_eeprom_check_and_recover(adev);
+	amdgpu_xcp_dev_unplug(adev);
+	amdgpu_gmc_prepare_nps_mode_change(adev);
+	drm_dev_unplug(dev);
+
+	if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
+
+	amdgpu_driver_unload_kms(dev);
+
+	/*
+	 * Flush any in flight DMA operations from device.
+	 * Clear the Bus Master Enable bit and then wait on the PCIe Device
+	 * StatusTransactions Pending bit.
+	 */
+	pci_disable_device(pdev);
+	pci_wait_for_pending_transaction(pdev);
+}
+
+static void
+amdgpu_pci_shutdown(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (amdgpu_ras_intr_triggered())
+		return;
+
+	/* device maybe not resumed here, return immediately in this case */
+	if (adev->in_s4 && adev->in_suspend)
+		return;
+
+	/* if we are running in a VM, make sure the device
+	 * torn down properly on reboot/shutdown.
+	 * unfortunately we can't detect certain
+	 * hypervisors so just do this all the time.
+	 */
+	if (!amdgpu_passthrough(adev))
+		adev->mp1_state = PP_MP1_STATE_UNLOAD;
+	amdgpu_device_prepare(dev);
+	amdgpu_device_suspend(dev, true);
+	adev->mp1_state = PP_MP1_STATE_NONE;
+}
+
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+	/* device maybe not resumed here, return immediately in this case */
+	if (adev->in_s4 && adev->in_suspend)
+		return 0;
+
+	/* Return a positive number here so
+	 * DPM_FLAG_SMART_SUSPEND works properly
+	 */
+	if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev))
+		return 1;
+
+	/* if we will not support s3 or s2i for the device
+	 *  then skip suspend
+	 */
+	if (!amdgpu_acpi_is_s0ix_active(adev) &&
+	    !amdgpu_acpi_is_s3_active(adev))
+		return 1;
+
+	return amdgpu_device_prepare(drm_dev);
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+	amdgpu_device_complete(dev_get_drvdata(dev));
+}
+
+static int amdgpu_pmops_suspend(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+	if (amdgpu_acpi_is_s0ix_active(adev))
+		adev->in_s0ix = true;
+	else if (amdgpu_acpi_is_s3_active(adev))
+		adev->in_s3 = true;
+	if (!adev->in_s0ix && !adev->in_s3) {
+#if IS_ENABLED(CONFIG_SUSPEND)
+		/* don't allow going deep first time followed by s2idle the next time */
+		if (adev->last_suspend_state != PM_SUSPEND_ON &&
+		    adev->last_suspend_state != pm_suspend_target_state) {
+			drm_err_once(drm_dev, "Unsupported suspend state %d\n",
+				     pm_suspend_target_state);
+			return -EINVAL;
+		}
+#endif
+		return 0;
+	}
+
+#if IS_ENABLED(CONFIG_SUSPEND)
+	/* cache the state last used for suspend */
+	adev->last_suspend_state = pm_suspend_target_state;
+#endif
+
+	return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int r;
+
+	if (amdgpu_acpi_should_gpu_reset(adev)) {
+		amdgpu_device_lock_reset_domain(adev->reset_domain);
+		r = amdgpu_asic_reset(adev);
+		amdgpu_device_unlock_reset_domain(adev->reset_domain);
+		return r;
+	}
+
+	return 0;
+}
+
+static int amdgpu_pmops_resume(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int r;
+
+	if (!adev->in_s0ix && !adev->in_s3)
+		return 0;
+
+	/* Avoids registers access if device is physically gone */
+	if (!pci_device_is_present(adev->pdev))
+		adev->no_hw_access = true;
+
+	r = amdgpu_device_resume(drm_dev, true);
+	if (amdgpu_acpi_is_s0ix_active(adev))
+		adev->in_s0ix = false;
+	else
+		adev->in_s3 = false;
+	return r;
+}
+
+static int amdgpu_pmops_freeze(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int r;
+
+	r = amdgpu_device_suspend(drm_dev, true);
+	if (r)
+		return r;
+
+	if (amdgpu_acpi_should_gpu_reset(adev))
+		return amdgpu_asic_reset(adev);
+	return 0;
+}
+
+static int amdgpu_pmops_thaw(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+	/* do not resume device if it's normal hibernation */
+	if (!pm_hibernate_is_recovering() && !pm_hibernation_mode_is_suspend())
+		return 0;
+
+	return amdgpu_device_resume(drm_dev, true);
+}
+
+static int amdgpu_pmops_poweroff(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+	/* device maybe not resumed here, return immediately in this case */
+	if (adev->in_s4 && adev->in_suspend)
+		return 0;
+
+	return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_restore(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+	return amdgpu_device_resume(drm_dev, true);
+}
+
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+	if (adev->mode_info.num_crtc) {
+		struct drm_connector *list_connector;
+		struct drm_connector_list_iter iter;
+		int ret = 0;
+
+		if (amdgpu_runtime_pm != -2) {
+			/* XXX: Return busy if any displays are connected to avoid
+			 * possible display wakeups after runtime resume due to
+			 * hotplug events in case any displays were connected while
+			 * the GPU was in suspend.  Remove this once that is fixed.
+			 */
+			mutex_lock(&drm_dev->mode_config.mutex);
+			drm_connector_list_iter_begin(drm_dev, &iter);
+			drm_for_each_connector_iter(list_connector, &iter) {
+				if (list_connector->status == connector_status_connected) {
+					ret = -EBUSY;
+					break;
+				}
+			}
+			drm_connector_list_iter_end(&iter);
+			mutex_unlock(&drm_dev->mode_config.mutex);
+
+			if (ret)
+				return ret;
+		}
+
+		if (adev->dc_enabled) {
+			struct drm_crtc *crtc;
+
+			drm_for_each_crtc(crtc, drm_dev) {
+				drm_modeset_lock(&crtc->mutex, NULL);
+				if (crtc->state->active)
+					ret = -EBUSY;
+				drm_modeset_unlock(&crtc->mutex);
+				if (ret < 0)
+					break;
+			}
+		} else {
+			mutex_lock(&drm_dev->mode_config.mutex);
+			drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+			drm_connector_list_iter_begin(drm_dev, &iter);
+			drm_for_each_connector_iter(list_connector, &iter) {
+				if (list_connector->dpms ==  DRM_MODE_DPMS_ON) {
+					ret = -EBUSY;
+					break;
+				}
+			}
+
+			drm_connector_list_iter_end(&iter);
+
+			drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+			mutex_unlock(&drm_dev->mode_config.mutex);
+		}
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+	return xa_empty(&adev->userq_doorbell_xa) ? 0 : -EBUSY;
+}
+
+static int amdgpu_pmops_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int ret, i;
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
+
+	ret = amdgpu_runtime_idle_check_display(dev);
+	if (ret)
+		return ret;
+	ret = amdgpu_runtime_idle_check_userq(dev);
+	if (ret)
+		return ret;
+
+	/* wait for all rings to drain before suspending */
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (ring && ring->sched.ready) {
+			ret = amdgpu_fence_wait_empty(ring);
+			if (ret)
+				return -EBUSY;
+		}
+	}
+
+	adev->in_runpm = true;
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+	/*
+	 * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
+	 * proper cleanups and put itself into a state ready for PNP. That
+	 * can address some random resuming failure observed on BOCO capable
+	 * platforms.
+	 * TODO: this may be also needed for PX capable platform.
+	 */
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+		adev->mp1_state = PP_MP1_STATE_UNLOAD;
+
+	ret = amdgpu_device_prepare(drm_dev);
+	if (ret)
+		return ret;
+	ret = amdgpu_device_suspend(drm_dev, false);
+	if (ret) {
+		adev->in_runpm = false;
+		if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+			adev->mp1_state = PP_MP1_STATE_NONE;
+		return ret;
+	}
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO)
+		adev->mp1_state = PP_MP1_STATE_NONE;
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
+		/* Only need to handle PCI state in the driver for ATPX
+		 * PCI core handles it for _PR3.
+		 */
+		amdgpu_device_cache_pci_state(pdev);
+		pci_disable_device(pdev);
+		pci_ignore_hotplug(pdev);
+		pci_set_power_state(pdev, PCI_D3cold);
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+	} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
+		/* nothing to do */
+	} else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+			(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+		amdgpu_device_baco_enter(adev);
+	}
+
+	dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
+	return 0;
+}
+
+static int amdgpu_pmops_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int ret;
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+		return -EINVAL;
+
+	/* Avoids registers access if device is physically gone */
+	if (!pci_device_is_present(adev->pdev))
+		adev->no_hw_access = true;
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) {
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+		/* Only need to handle PCI state in the driver for ATPX
+		 * PCI core handles it for _PR3.
+		 */
+		pci_set_power_state(pdev, PCI_D0);
+		amdgpu_device_load_pci_state(pdev);
+		ret = pci_enable_device(pdev);
+		if (ret)
+			return ret;
+		pci_set_master(pdev);
+	} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
+		/* Only need to handle PCI state in the driver for ATPX
+		 * PCI core handles it for _PR3.
+		 */
+		pci_set_master(pdev);
+	} else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+			(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
+		amdgpu_device_baco_exit(adev);
+	}
+	ret = amdgpu_device_resume(drm_dev, false);
+	if (ret) {
+		if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
+			pci_disable_device(pdev);
+		return ret;
+	}
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX)
+		drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+	adev->in_runpm = false;
+	return 0;
+}
+
+static int amdgpu_pmops_runtime_idle(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+	int ret;
+
+	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
+		pm_runtime_forbid(dev);
+		return -EBUSY;
+	}
+
+	ret = amdgpu_runtime_idle_check_display(dev);
+	if (ret)
+		goto done;
+
+	ret = amdgpu_runtime_idle_check_userq(dev);
+done:
+	pm_runtime_autosuspend(dev);
+	return ret;
+}
+
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	struct drm_device *dev = file_priv->minor->dev;
+	int idx;
+
+	if (fpriv && drm_dev_enter(dev, &idx)) {
+		fpriv->evf_mgr.fd_closing = true;
+		amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+		amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+		drm_dev_exit(idx);
+	}
+
+	return drm_release(inode, filp);
+}
+
+long amdgpu_drm_ioctl(struct file *filp,
+		      unsigned int cmd, unsigned long arg)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct drm_device *dev;
+	long ret;
+
+	dev = file_priv->minor->dev;
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0)
+		goto out;
+
+	ret = drm_ioctl(filp, cmd, arg);
+
+out:
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
+
+static const struct dev_pm_ops amdgpu_pm_ops = {
+	.prepare = pm_sleep_ptr(amdgpu_pmops_prepare),
+	.complete = pm_sleep_ptr(amdgpu_pmops_complete),
+	.suspend = pm_sleep_ptr(amdgpu_pmops_suspend),
+	.suspend_noirq = pm_sleep_ptr(amdgpu_pmops_suspend_noirq),
+	.resume = pm_sleep_ptr(amdgpu_pmops_resume),
+	.freeze = pm_sleep_ptr(amdgpu_pmops_freeze),
+	.thaw = pm_sleep_ptr(amdgpu_pmops_thaw),
+	.poweroff = pm_sleep_ptr(amdgpu_pmops_poweroff),
+	.restore = pm_sleep_ptr(amdgpu_pmops_restore),
+	.runtime_suspend = amdgpu_pmops_runtime_suspend,
+	.runtime_resume = amdgpu_pmops_runtime_resume,
+	.runtime_idle = amdgpu_pmops_runtime_idle,
+};
+
+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+	struct drm_file *file_priv = f->private_data;
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
+
+	timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout);
+	timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
+
+	return timeout >= 0 ? 0 : timeout;
+}
+
+static const struct file_operations amdgpu_driver_kms_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.flush = amdgpu_flush,
+	.release = amdgpu_drm_release,
+	.unlocked_ioctl = amdgpu_drm_ioctl,
+	.mmap = drm_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = amdgpu_kms_compat_ioctl,
+#endif
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = drm_show_fdinfo,
+#endif
+	.fop_flags = FOP_UNSIGNED_OFFSET,
+};
+
+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+	struct drm_file *file;
+
+	if (!filp)
+		return -EINVAL;
+
+	if (filp->f_op != &amdgpu_driver_kms_fops)
+		return -EINVAL;
+
+	file = filp->private_data;
+	*fpriv = file->driver_priv;
+	return 0;
+}
+
+const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	/* KMS */
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct drm_driver amdgpu_kms_driver = {
+	.driver_features =
+	    DRIVER_ATOMIC |
+	    DRIVER_GEM |
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
+	.open = amdgpu_driver_open_kms,
+	.postclose = amdgpu_driver_postclose_kms,
+	.ioctls = amdgpu_ioctls_kms,
+	.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+	.dumb_create = amdgpu_mode_dumb_create,
+	.dumb_map_offset = amdgpu_mode_dumb_mmap,
+	DRM_FBDEV_TTM_DRIVER_OPS,
+	.fops = &amdgpu_driver_kms_fops,
+	.release = &amdgpu_driver_release_kms,
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo = amdgpu_show_fdinfo,
+#endif
+
+	.gem_prime_import = amdgpu_gem_prime_import,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.major = KMS_DRIVER_MAJOR,
+	.minor = KMS_DRIVER_MINOR,
+	.patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+const struct drm_driver amdgpu_partition_driver = {
+	.driver_features =
+	    DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
+	.open = amdgpu_driver_open_kms,
+	.postclose = amdgpu_driver_postclose_kms,
+	.ioctls = amdgpu_ioctls_kms,
+	.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
+	.dumb_create = amdgpu_mode_dumb_create,
+	.dumb_map_offset = amdgpu_mode_dumb_mmap,
+	DRM_FBDEV_TTM_DRIVER_OPS,
+	.fops = &amdgpu_driver_kms_fops,
+	.release = &amdgpu_driver_release_kms,
+
+	.gem_prime_import = amdgpu_gem_prime_import,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.major = KMS_DRIVER_MAJOR,
+	.minor = KMS_DRIVER_MINOR,
+	.patchlevel = KMS_DRIVER_PATCHLEVEL,
+};
+
+static struct pci_error_handlers amdgpu_pci_err_handler = {
+	.error_detected	= amdgpu_pci_error_detected,
+	.mmio_enabled	= amdgpu_pci_mmio_enabled,
+	.slot_reset	= amdgpu_pci_slot_reset,
+	.resume		= amdgpu_pci_resume,
+};
+
+static const struct attribute_group *amdgpu_sysfs_groups[] = {
+	&amdgpu_vram_mgr_attr_group,
+	&amdgpu_gtt_mgr_attr_group,
+	&amdgpu_flash_attr_group,
+	NULL,
+};
+
+static struct pci_driver amdgpu_kms_pci_driver = {
+	.name = DRIVER_NAME,
+	.id_table = pciidlist,
+	.probe = amdgpu_pci_probe,
+	.remove = amdgpu_pci_remove,
+	.shutdown = amdgpu_pci_shutdown,
+	.driver.pm = pm_ptr(&amdgpu_pm_ops),
+	.err_handler = &amdgpu_pci_err_handler,
+	.dev_groups = amdgpu_sysfs_groups,
+};
+
+static int __init amdgpu_init(void)
+{
+	int r;
+
+	r = amdgpu_sync_init();
+	if (r)
+		goto error_sync;
+
+	r = amdgpu_userq_fence_slab_init();
+	if (r)
+		goto error_fence;
+
+	DRM_INFO("amdgpu kernel modesetting enabled.\n");
+	amdgpu_register_atpx_handler();
+	amdgpu_acpi_detect();
+
+	/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+	amdgpu_amdkfd_init();
+
+	if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
+		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+		pr_crit("Overdrive is enabled, please disable it before "
+			"reporting any bugs unrelated to overdrive.\n");
+	}
+
+	/* let modprobe override vga console setting */
+	return pci_register_driver(&amdgpu_kms_pci_driver);
+
+error_fence:
+	amdgpu_sync_fini();
+
+error_sync:
+	return r;
+}
+
+static void __exit amdgpu_exit(void)
+{
+	amdgpu_amdkfd_fini();
+	pci_unregister_driver(&amdgpu_kms_pci_driver);
+	amdgpu_unregister_atpx_handler();
+	amdgpu_acpi_release();
+	amdgpu_sync_fini();
+	amdgpu_userq_fence_slab_fini();
+	mmu_notifier_synchronize();
+	amdgpu_xcp_drv_release();
+}
+
+module_init(amdgpu_init);
+module_exit(amdgpu_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
new file mode 100644
index 000000000000..2d86cc6f7f4d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -0,0 +1,52 @@
+/* amdgpu_drv.h -- Private header for amdgpu driver -*- linux-c -*-
+ *
+ * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DRV_H__
+#define __AMDGPU_DRV_H__
+
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+
+#include "amd_shared.h"
+
+/* General customization:
+ */
+
+#define DRIVER_AUTHOR		"AMD linux driver team"
+
+#define DRIVER_NAME		"amdgpu"
+#define DRIVER_DESC		"AMD GPU"
+
+extern const struct drm_driver amdgpu_partition_driver;
+
+long amdgpu_drm_ioctl(struct file *filp,
+		      unsigned int cmd, unsigned long arg);
+
+long amdgpu_kms_compat_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
new file mode 100644
index 000000000000..8cd69836dd99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_eeprom.h"
+#include "amdgpu.h"
+
+/* AT24CM02 and M24M02-R have a 256-byte write page size.
+ */
+#define EEPROM_PAGE_BITS   8
+#define EEPROM_PAGE_SIZE   (1U << EEPROM_PAGE_BITS)
+#define EEPROM_PAGE_MASK   (EEPROM_PAGE_SIZE - 1)
+
+#define EEPROM_OFFSET_SIZE 2
+
+/* EEPROM memory addresses are 19-bits long, which can
+ * be partitioned into 3, 8, 8 bits, for a total of 19.
+ * The upper 3 bits are sent as part of the 7-bit
+ * "Device Type Identifier"--an I2C concept, which for EEPROM devices
+ * is hard-coded as 1010b, indicating that it is an EEPROM
+ * device--this is the wire format, followed by the upper
+ * 3 bits of the 19-bit address, followed by the direction,
+ * followed by two bytes holding the rest of the 16-bits of
+ * the EEPROM memory address. The format on the wire for EEPROM
+ * devices is: 1010XYZD, A15:A8, A7:A0,
+ * Where D is the direction and sequenced out by the hardware.
+ * Bits XYZ are memory address bits 18, 17 and 16.
+ * These bits are compared to how pins 1-3 of the part are connected,
+ * depending on the size of the part, more on that later.
+ *
+ * Note that of this wire format, a client is in control
+ * of, and needs to specify only XYZ, A15:A8, A7:0, bits,
+ * which is exactly the EEPROM memory address, or offset,
+ * in order to address up to 8 EEPROM devices on the I2C bus.
+ *
+ * For instance, a 2-Mbit I2C EEPROM part, addresses all its bytes,
+ * using an 18-bit address, bit 17 to 0 and thus would use all but one bit of
+ * the 19 bits previously mentioned. The designer would then not connect
+ * pins 1 and 2, and pin 3 usually named "A_2" or "E2", would be connected to
+ * either Vcc or GND. This would allow for up to two 2-Mbit parts on
+ * the same bus, where one would be addressable with bit 18 as 1, and
+ * the other with bit 18 of the address as 0.
+ *
+ * For a 2-Mbit part, bit 18 is usually known as the "Chip Enable" or
+ * "Hardware Address Bit". This bit is compared to the load on pin 3
+ * of the device, described above, and if there is a match, then this
+ * device responds to the command. This way, you can connect two
+ * 2-Mbit EEPROM devices on the same bus, but see one contiguous
+ * memory from 0 to 7FFFFh, where address 0 to 3FFFF is in the device
+ * whose pin 3 is connected to GND, and address 40000 to 7FFFFh is in
+ * the 2nd device, whose pin 3 is connected to Vcc.
+ *
+ * This addressing you encode in the 32-bit "eeprom_addr" below,
+ * namely the 19-bits "XYZ,A15:A0", as a single 19-bit address. For
+ * instance, eeprom_addr = 0x6DA01, is 110_1101_1010_0000_0001, where
+ * XYZ=110b, and A15:A0=DA01h. The XYZ bits become part of the device
+ * address, and the rest of the address bits are sent as the memory
+ * address bytes.
+ *
+ * That is, for an I2C EEPROM driver everything is controlled by
+ * the "eeprom_addr".
+ *
+ * See also top of amdgpu_ras_eeprom.c.
+ *
+ * P.S. If you need to write, lock and read the Identification Page,
+ * (M24M02-DR device only, which we do not use), change the "7" to
+ * "0xF" in the macro below, and let the client set bit 20 to 1 in
+ * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to
+ * 1 to lock it permanently.
+ */
+#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
+
+static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+				u8 *eeprom_buf, u32 buf_size, bool read)
+{
+	u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
+	struct i2c_msg msgs[] = {
+		{
+			.flags = 0,
+			.len = EEPROM_OFFSET_SIZE,
+			.buf = eeprom_offset_buf,
+		},
+		{
+			.flags = read ? I2C_M_RD : 0,
+		},
+	};
+	const u8 *p = eeprom_buf;
+	int r;
+	u16 len;
+
+	for (r = 0; buf_size > 0;
+	      buf_size -= len, eeprom_addr += len, eeprom_buf += len) {
+		/* Set the EEPROM address we want to write to/read from.
+		 */
+		msgs[0].addr = MAKE_I2C_ADDR(eeprom_addr);
+		msgs[1].addr = msgs[0].addr;
+		msgs[0].buf[0] = (eeprom_addr >> 8) & 0xff;
+		msgs[0].buf[1] = eeprom_addr & 0xff;
+
+		if (!read) {
+			/* Write the maximum amount of data, without
+			 * crossing the device's page boundary, as per
+			 * its spec. Partial page writes are allowed,
+			 * starting at any location within the page,
+			 * so long as the page boundary isn't crossed
+			 * over (actually the page pointer rolls
+			 * over).
+			 *
+			 * As per the AT24CM02 EEPROM spec, after
+			 * writing into a page, the I2C driver should
+			 * terminate the transfer, i.e. in
+			 * "i2c_transfer()" below, with a STOP
+			 * condition, so that the self-timed write
+			 * cycle begins. This is implied for the
+			 * "i2c_transfer()" abstraction.
+			 */
+			len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK),
+					buf_size);
+		} else {
+			/* Reading from the EEPROM has no limitation
+			 * on the number of bytes read from the EEPROM
+			 * device--they are simply sequenced out.
+			 * Keep in mind that i2c_msg.len is u16 type.
+			 */
+			len = min(U16_MAX, buf_size);
+		}
+		msgs[1].len = len;
+		msgs[1].buf = eeprom_buf;
+
+		/* This constitutes a START-STOP transaction.
+		 */
+		r = i2c_transfer(i2c_adap, msgs, ARRAY_SIZE(msgs));
+		if (r != ARRAY_SIZE(msgs))
+			break;
+
+		if (!read) {
+			/* According to EEPROM specs the length of the
+			 * self-writing cycle, tWR (tW), is 10 ms.
+			 *
+			 * TODO: Use polling on ACK, aka Acknowledge
+			 * Polling, to minimize waiting for the
+			 * internal write cycle to complete, as it is
+			 * usually smaller than tWR (tW).
+			 */
+			msleep(10);
+		}
+	}
+
+	return r < 0 ? r : eeprom_buf - p;
+}
+
+/**
+ * amdgpu_eeprom_xfer -- Read/write from/to an I2C EEPROM device
+ * @i2c_adap: pointer to the I2C adapter to use
+ * @eeprom_addr: EEPROM address from which to read/write
+ * @eeprom_buf: pointer to data buffer to read into/write from
+ * @buf_size: the size of @eeprom_buf
+ * @read: True if reading from the EEPROM, false if writing
+ *
+ * Returns the number of bytes read/written; -errno on error.
+ */
+static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
+			      u8 *eeprom_buf, u32 buf_size, bool read)
+{
+	const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
+	u16 limit;
+	u16 ps; /* Partial size */
+	int res = 0, r;
+
+	if (!quirks)
+		limit = 0;
+	else if (read)
+		limit = quirks->max_read_len;
+	else
+		limit = quirks->max_write_len;
+
+	if (limit == 0) {
+		return __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+					    eeprom_buf, buf_size, read);
+	} else if (limit <= EEPROM_OFFSET_SIZE) {
+		dev_err_ratelimited(&i2c_adap->dev,
+				    "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
+				    eeprom_addr, buf_size,
+				    str_read_write(read), EEPROM_OFFSET_SIZE);
+		return -EINVAL;
+	}
+
+	/* The "limit" includes all data bytes sent/received,
+	 * which would include the EEPROM_OFFSET_SIZE bytes.
+	 * Account for them here.
+	 */
+	limit -= EEPROM_OFFSET_SIZE;
+	for ( ; buf_size > 0;
+	      buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) {
+		ps = min(limit, buf_size);
+
+		r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr,
+					 eeprom_buf, ps, read);
+		if (r < 0)
+			return r;
+		res += r;
+	}
+
+	return res;
+}
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+		       u32 eeprom_addr, u8 *eeprom_buf,
+		       u32 bytes)
+{
+	return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+				  true);
+}
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+			u32 eeprom_addr, u8 *eeprom_buf,
+			u32 bytes)
+{
+	return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
+				  false);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
new file mode 100644
index 000000000000..8083b8253ef4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_EEPROM_H
+#define _AMDGPU_EEPROM_H
+
+#include <linux/i2c.h>
+
+int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
+		       u32 eeprom_addr, u8 *eeprom_buf,
+		       u32 bytes);
+
+int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
+			u32 eeprom_addr, u8 *eeprom_buf,
+			u32 bytes);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
new file mode 100644
index 000000000000..3aaeed2d3562
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+
+void
+amdgpu_link_encoder_connector(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector;
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	/* walk the list and link encoders to connectors */
+	drm_for_each_connector_iter(connector, &iter) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			amdgpu_encoder = to_amdgpu_encoder(encoder);
+			if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+				drm_connector_attach_encoder(connector, encoder);
+				if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+					amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);
+					adev->mode_info.bl_encoder = amdgpu_encoder;
+				}
+			}
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+void amdgpu_encoder_set_active_device(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+			amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices;
+			DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n",
+				  amdgpu_encoder->active_device, amdgpu_encoder->devices,
+				  amdgpu_connector->devices, encoder->encoder_type);
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector, *found = NULL;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		if (amdgpu_encoder->active_device & amdgpu_connector->devices) {
+			found = connector;
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+	return found;
+}
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector, *found = NULL;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		if (amdgpu_encoder->devices & amdgpu_connector->devices) {
+			found = connector;
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+	return found;
+}
+
+struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_encoder *other_encoder;
+	struct amdgpu_encoder *other_amdgpu_encoder;
+
+	if (amdgpu_encoder->is_ext_encoder)
+		return NULL;
+
+	list_for_each_entry(other_encoder, &dev->mode_config.encoder_list, head) {
+		if (other_encoder == encoder)
+			continue;
+		other_amdgpu_encoder = to_amdgpu_encoder(other_encoder);
+		if (other_amdgpu_encoder->is_ext_encoder &&
+		    (amdgpu_encoder->devices & other_amdgpu_encoder->devices))
+			return other_encoder;
+	}
+	return NULL;
+}
+
+u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder)
+{
+	struct drm_encoder *other_encoder = amdgpu_get_external_encoder(encoder);
+
+	if (other_encoder) {
+		struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(other_encoder);
+
+		switch (amdgpu_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_TRAVIS:
+		case ENCODER_OBJECT_ID_NUTMEG:
+			return amdgpu_encoder->encoder_id;
+		default:
+			return ENCODER_OBJECT_ID_NONE;
+		}
+	}
+	return ENCODER_OBJECT_ID_NONE;
+}
+
+void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
+			     struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode;
+	unsigned int hblank = native_mode->htotal - native_mode->hdisplay;
+	unsigned int vblank = native_mode->vtotal - native_mode->vdisplay;
+	unsigned int hover = native_mode->hsync_start - native_mode->hdisplay;
+	unsigned int vover = native_mode->vsync_start - native_mode->vdisplay;
+	unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start;
+	unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start;
+
+	adjusted_mode->clock = native_mode->clock;
+	adjusted_mode->flags = native_mode->flags;
+
+	adjusted_mode->hdisplay = native_mode->hdisplay;
+	adjusted_mode->vdisplay = native_mode->vdisplay;
+
+	adjusted_mode->htotal = native_mode->hdisplay + hblank;
+	adjusted_mode->hsync_start = native_mode->hdisplay + hover;
+	adjusted_mode->hsync_end = adjusted_mode->hsync_start + hsync_width;
+
+	adjusted_mode->vtotal = native_mode->vdisplay + vblank;
+	adjusted_mode->vsync_start = native_mode->vdisplay + vover;
+	adjusted_mode->vsync_end = adjusted_mode->vsync_start + vsync_width;
+
+	drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+
+	adjusted_mode->crtc_hdisplay = native_mode->hdisplay;
+	adjusted_mode->crtc_vdisplay = native_mode->vdisplay;
+
+	adjusted_mode->crtc_htotal = adjusted_mode->crtc_hdisplay + hblank;
+	adjusted_mode->crtc_hsync_start = adjusted_mode->crtc_hdisplay + hover;
+	adjusted_mode->crtc_hsync_end = adjusted_mode->crtc_hsync_start + hsync_width;
+
+	adjusted_mode->crtc_vtotal = adjusted_mode->crtc_vdisplay + vblank;
+	adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + vover;
+	adjusted_mode->crtc_vsync_end = adjusted_mode->crtc_vsync_start + vsync_width;
+
+}
+
+bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
+				    u32 pixel_clock)
+{
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector;
+	struct amdgpu_connector_atom_dig *dig_connector;
+
+	connector = amdgpu_get_connector_for_encoder(encoder);
+	/* if we don't have an active device yet, just use one of
+	 * the connectors tied to the encoder.
+	 */
+	if (!connector)
+		connector = amdgpu_get_connector_for_encoder_init(encoder);
+	amdgpu_connector = to_amdgpu_connector(connector);
+
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_DVII:
+	case DRM_MODE_CONNECTOR_HDMIB:
+		if (amdgpu_connector->use_digital) {
+			/* HDMI 1.3 supports up to 340 Mhz over single link */
+			if (connector->display_info.is_hdmi) {
+				if (pixel_clock > 340000)
+					return true;
+				else
+					return false;
+			} else {
+				if (pixel_clock > 165000)
+					return true;
+				else
+					return false;
+			}
+		} else
+			return false;
+	case DRM_MODE_CONNECTOR_DVID:
+	case DRM_MODE_CONNECTOR_HDMIA:
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		dig_connector = amdgpu_connector->con_priv;
+		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
+			return false;
+		else {
+			/* HDMI 1.3 supports up to 340 Mhz over single link */
+			if (connector->display_info.is_hdmi) {
+				if (pixel_clock > 340000)
+					return true;
+				else
+					return false;
+			} else {
+				if (pixel_clock > 165000)
+					return true;
+				else
+					return false;
+			}
+		}
+	default:
+		return false;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..23d7d0b0d625
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence *ef;
+
+	ef = container_of(f, struct amdgpu_eviction_fence, base);
+	return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec)
+{
+	struct amdgpu_eviction_fence *old_ef, *new_ef;
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	if (evf_mgr->ev_fence &&
+	    !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+		return 0;
+	/*
+	 * Steps to replace eviction fence:
+	 * * lock all objects in exec (caller)
+	 * * create a new eviction fence
+	 * * update new eviction fence in evf_mgr
+	 * * attach the new eviction fence to BOs
+	 * * release the old fence
+	 * * unlock the objects (caller)
+	 */
+	new_ef = amdgpu_eviction_fence_create(evf_mgr);
+	if (!new_ef) {
+		DRM_ERROR("Failed to create new eviction fence\n");
+		return -ENOMEM;
+	}
+
+	/* Update the eviction fence now */
+	spin_lock(&evf_mgr->ev_fence_lock);
+	old_ef = evf_mgr->ev_fence;
+	evf_mgr->ev_fence = new_ef;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	/* Attach the new fence */
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+		if (!bo)
+			continue;
+		ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+		if (ret) {
+			DRM_ERROR("Failed to attch new eviction fence\n");
+			goto free_err;
+		}
+	}
+
+	/* Free old fence */
+	if (old_ef)
+		dma_fence_put(&old_ef->base);
+	return 0;
+
+free_err:
+	kfree(new_ef);
+	return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+	struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_eviction_fence *ev_fence;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	if (ev_fence)
+		dma_fence_get(&ev_fence->base);
+	else
+		goto unlock;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	amdgpu_userq_evict(uq_mgr, ev_fence);
+
+	mutex_unlock(&uq_mgr->userq_mutex);
+	dma_fence_put(&ev_fence->base);
+	return;
+
+unlock:
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+	struct amdgpu_eviction_fence *ev_fence;
+
+	if (!f)
+		return true;
+
+	ev_fence = to_ev_fence(f);
+	evf_mgr = ev_fence->evf_mgr;
+
+	schedule_delayed_work(&evf_mgr->suspend_work, 0);
+	return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+	.get_driver_name = amdgpu_eviction_fence_get_driver_name,
+	.get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+	.enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_eviction_fence *ev_fence)
+{
+	spin_lock(&evf_mgr->ev_fence_lock);
+	dma_fence_signal(&ev_fence->base);
+	spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+	if (!ev_fence)
+		return NULL;
+
+	ev_fence->evf_mgr = evf_mgr;
+	get_task_comm(ev_fence->timeline_name, current);
+	spin_lock_init(&ev_fence->lock);
+	dma_fence_init64(&ev_fence->base, &amdgpu_eviction_fence_ops,
+			 &ev_fence->lock, evf_mgr->ev_fence_ctx,
+			 atomic_inc_return(&evf_mgr->ev_fence_seq));
+	return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	/* Wait for any pending work to execute */
+	flush_delayed_work(&evf_mgr->suspend_work);
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	if (!ev_fence)
+		return;
+
+	dma_fence_wait(&ev_fence->base, false);
+
+	/* Last unref of ev_fence */
+	dma_fence_put(&ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				 struct amdgpu_bo *bo)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+	struct dma_resv *resv = bo->tbo.base.resv;
+	int ret;
+
+	if (!resv)
+		return 0;
+
+	ret = dma_resv_reserve_fences(resv, 1);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+		return ret;
+	}
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	if (ev_fence)
+		dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_bo *bo)
+{
+	struct dma_fence *stub = dma_fence_get_stub();
+
+	dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+				stub, DMA_RESV_USAGE_BOOKKEEP);
+	dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	/* This needs to be done one time per open */
+	atomic_set(&evf_mgr->ev_fence_seq, 0);
+	evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+	spin_lock_init(&evf_mgr->ev_fence_lock);
+
+	INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+	struct dma_fence base;
+	spinlock_t	 lock;
+	char		 timeline_name[TASK_COMM_LEN];
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+	u64			ev_fence_ctx;
+	atomic_t		ev_fence_seq;
+	spinlock_t		ev_fence_lock;
+	struct amdgpu_eviction_fence *ev_fence;
+	struct delayed_work	suspend_work;
+	uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
new file mode 100644
index 000000000000..b349bb3676d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+/* Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ *          Roy Sun
+ */
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_ctx.h"
+#include "amdgpu_fdinfo.h"
+
+
+static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = {
+	[AMDGPU_HW_IP_GFX]	=	"gfx",
+	[AMDGPU_HW_IP_COMPUTE]	=	"compute",
+	[AMDGPU_HW_IP_DMA]	=	"dma",
+	[AMDGPU_HW_IP_UVD]	=	"dec",
+	[AMDGPU_HW_IP_VCE]	=	"enc",
+	[AMDGPU_HW_IP_UVD_ENC]	=	"enc_1",
+	[AMDGPU_HW_IP_VCN_DEC]	=	"dec",
+	[AMDGPU_HW_IP_VCN_ENC]	=	"enc",
+	[AMDGPU_HW_IP_VCN_JPEG]	=	"jpeg",
+	[AMDGPU_HW_IP_VPE]	=	"vpe",
+};
+
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct amdgpu_fpriv *fpriv = file->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+
+	struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+	ktime_t usage[AMDGPU_HW_IP_NUM];
+	const char *pl_name[] = {
+		[TTM_PL_VRAM] = "vram",
+		[TTM_PL_TT] = "gtt",
+		[TTM_PL_SYSTEM] = "cpu",
+		[AMDGPU_PL_GDS] = "gds",
+		[AMDGPU_PL_GWS] = "gws",
+		[AMDGPU_PL_OA] = "oa",
+		[AMDGPU_PL_DOORBELL] = "doorbell",
+		[AMDGPU_PL_MMIO_REMAP] = "mmioremap",
+	};
+	unsigned int hw_ip, i;
+
+	amdgpu_vm_get_memory(vm, stats);
+	amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
+
+	/*
+	 * ******************************************************************
+	 * For text output format description please see drm-usage-stats.rst!
+	 * ******************************************************************
+	 */
+
+	drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
+
+	for (i = 0; i < ARRAY_SIZE(pl_name); i++) {
+		if (!pl_name[i])
+			continue;
+
+		drm_print_memory_stats(p,
+				       &stats[i].drm,
+				       DRM_GEM_OBJECT_RESIDENT |
+				       DRM_GEM_OBJECT_PURGEABLE,
+				       pl_name[i]);
+	}
+
+	/* Legacy amdgpu keys, alias to drm-resident-memory-: */
+	drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+		   stats[TTM_PL_VRAM].drm.resident/1024UL);
+	drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+		   stats[TTM_PL_TT].drm.resident/1024UL);
+	drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+		   stats[TTM_PL_SYSTEM].drm.resident/1024UL);
+
+	/* Amdgpu specific memory accounting keys: */
+	drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
+		   stats[TTM_PL_VRAM].evicted/1024UL);
+	drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
+		   (stats[TTM_PL_VRAM].drm.shared +
+		    stats[TTM_PL_VRAM].drm.private) / 1024UL);
+	drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
+		   (stats[TTM_PL_TT].drm.shared +
+		    stats[TTM_PL_TT].drm.private) / 1024UL);
+
+	for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
+		if (!usage[hw_ip])
+			continue;
+
+		drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip],
+			   ktime_to_ns(usage[hw_ip]));
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
new file mode 100644
index 000000000000..0398f5a159ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: David Nieto
+ *          Roy Sun
+ */
+#ifndef __AMDGPU_SMI_H__
+#define __AMDGPU_SMI_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+
+uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id);
+void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
new file mode 100644
index 000000000000..c7843e336310
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -0,0 +1,976 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Dave Airlie
+ */
+#include <linux/seq_file.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+
+/*
+ * Cast helper
+ */
+static const struct dma_fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
+{
+	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+	return __f;
+}
+
+/**
+ * amdgpu_fence_write - write a fence value
+ *
+ * @ring: ring the fence is associated with
+ * @seq: sequence number to write
+ *
+ * Writes a fence value to memory (all asics).
+ */
+static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+
+	if (drv->cpu_addr)
+		*drv->cpu_addr = cpu_to_le32(seq);
+}
+
+/**
+ * amdgpu_fence_read - read a fence value
+ *
+ * @ring: ring the fence is associated with
+ *
+ * Reads a fence value from memory (all asics).
+ * Returns the value of the fence read from memory.
+ */
+static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	u32 seq = 0;
+
+	if (drv->cpu_addr)
+		seq = le32_to_cpu(*drv->cpu_addr);
+	else
+		seq = atomic_read(&drv->last_seq);
+
+	return seq;
+}
+
+/**
+ * amdgpu_fence_emit - emit a fence on the requested ring
+ *
+ * @ring: ring the fence is associated with
+ * @af: amdgpu fence input
+ * @flags: flags to pass into the subordinate .emit_fence() call
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+		      unsigned int flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *fence;
+	struct dma_fence __rcu **ptr;
+	uint32_t seq;
+	int r;
+
+	fence = &af->base;
+	af->ring = ring;
+
+	seq = ++ring->fence_drv.sync_seq;
+	dma_fence_init(fence, &amdgpu_fence_ops,
+		       &ring->fence_drv.lock,
+		       adev->fence_context + ring->idx, seq);
+
+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+			       seq, flags | AMDGPU_FENCE_FLAG_INT);
+	amdgpu_fence_save_wptr(af);
+	pm_runtime_get_noresume(adev_to_drm(adev)->dev);
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	if (unlikely(rcu_dereference_protected(*ptr, 1))) {
+		struct dma_fence *old;
+
+		rcu_read_lock();
+		old = dma_fence_get_rcu_safe(ptr);
+		rcu_read_unlock();
+
+		if (old) {
+			r = dma_fence_wait(old, false);
+			dma_fence_put(old);
+			if (r)
+				return r;
+		}
+	}
+
+	to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
+	/* This function can't be called concurrently anyway, otherwise
+	 * emitting the fence would mess up the hardware ring buffer.
+	 */
+	rcu_assign_pointer(*ptr, dma_fence_get(fence));
+
+	return 0;
+}
+
+/**
+ * amdgpu_fence_emit_polling - emit a fence on the requeste ring
+ *
+ * @ring: ring the fence is associated with
+ * @s: resulting sequence number
+ * @timeout: the timeout for waiting in usecs
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Used For polling fence.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+			      uint32_t timeout)
+{
+	uint32_t seq;
+	signed long r;
+
+	if (!s)
+		return -EINVAL;
+
+	seq = ++ring->fence_drv.sync_seq;
+	r = amdgpu_fence_wait_polling(ring,
+				      seq - ring->fence_drv.num_fences_mask,
+				      timeout);
+	if (r < 1)
+		return -ETIMEDOUT;
+
+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+			       seq, 0);
+
+	*s = seq;
+
+	return 0;
+}
+
+/**
+ * amdgpu_fence_schedule_fallback - schedule fallback check
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Start a timer as fallback to our interrupts.
+ */
+static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+{
+	mod_timer(&ring->fence_drv.fallback_timer,
+		  jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
+}
+
+/**
+ * amdgpu_fence_process - check for fence activity
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Checks the current fence value and calculates the last
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
+ *
+ * Returns true if fence was processed
+ */
+bool amdgpu_fence_process(struct amdgpu_ring *ring)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t seq, last_seq;
+
+	do {
+		last_seq = atomic_read(&ring->fence_drv.last_seq);
+		seq = amdgpu_fence_read(ring);
+
+	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
+
+	if (timer_delete(&ring->fence_drv.fallback_timer) &&
+	    seq != ring->fence_drv.sync_seq)
+		amdgpu_fence_schedule_fallback(ring);
+
+	if (unlikely(seq == last_seq))
+		return false;
+
+	last_seq &= drv->num_fences_mask;
+	seq &= drv->num_fences_mask;
+
+	do {
+		struct dma_fence *fence, **ptr;
+		struct amdgpu_fence *am_fence;
+
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];
+
+		/* There is always exactly one thread signaling this fence slot */
+		fence = rcu_dereference_protected(*ptr, 1);
+		RCU_INIT_POINTER(*ptr, NULL);
+
+		if (!fence)
+			continue;
+
+		/* Save the wptr in the fence driver so we know what the last processed
+		 * wptr was.  This is required for re-emitting the ring state for
+		 * queues that are reset but are not guilty and thus have no guilty fence.
+		 */
+		am_fence = container_of(fence, struct amdgpu_fence, base);
+		drv->signalled_wptr = am_fence->wptr;
+		dma_fence_signal(fence);
+		dma_fence_put(fence);
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+	} while (last_seq != seq);
+
+	return true;
+}
+
+/**
+ * amdgpu_fence_fallback - fallback for hardware interrupts
+ *
+ * @t: timer context used to obtain the pointer to ring structure
+ *
+ * Checks for fence activity.
+ */
+static void amdgpu_fence_fallback(struct timer_list *t)
+{
+	struct amdgpu_ring *ring = timer_container_of(ring, t,
+						      fence_drv.fallback_timer);
+
+	if (amdgpu_fence_process(ring))
+		dev_warn(ring->adev->dev,
+			 "Fence fallback timer expired on ring %s\n",
+			 ring->name);
+}
+
+/**
+ * amdgpu_fence_wait_empty - wait for all fences to signal
+ *
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns 0 if the fences have passed, error for all other cases.
+ */
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
+{
+	uint64_t seq = READ_ONCE(ring->fence_drv.sync_seq);
+	struct dma_fence *fence, **ptr;
+	int r;
+
+	if (!seq)
+		return 0;
+
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	rcu_read_lock();
+	fence = rcu_dereference(*ptr);
+	if (!fence || !dma_fence_get_rcu(fence)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	r = dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+	return r;
+}
+
+/**
+ * amdgpu_fence_wait_polling - busy wait for givn sequence number
+ *
+ * @ring: ring index the fence is associated with
+ * @wait_seq: sequence number to wait
+ * @timeout: the timeout for waiting in usecs
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns left time if no timeout, 0 or minus if timeout.
+ */
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+				      uint32_t wait_seq,
+				      signed long timeout)
+{
+
+	while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) {
+		udelay(2);
+		timeout -= 2;
+	}
+	return timeout > 0 ? timeout : 0;
+}
+/**
+ * amdgpu_fence_count_emitted - get the count of emitted fences
+ *
+ * @ring: ring the fence is associated with
+ *
+ * Get the number of fences emitted on the requested ring (all asics).
+ * Returns the number of emitted fences on the ring.  Used by the
+ * dynpm code to ring track activity.
+ */
+unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+{
+	uint64_t emitted;
+
+	/* We are not protected by ring lock when reading the last sequence
+	 * but it's ok to report slightly wrong fence count here.
+	 */
+	emitted = 0x100000000ull;
+	emitted -= atomic_read(&ring->fence_drv.last_seq);
+	emitted += READ_ONCE(ring->fence_drv.sync_seq);
+	return lower_32_bits(emitted);
+}
+
+/**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct dma_fence *fence;
+	uint32_t last_seq, sync_seq;
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+	sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+	if (last_seq == sync_seq)
+		return 0;
+
+	++last_seq;
+	last_seq &= drv->num_fences_mask;
+	fence = drv->fences[last_seq];
+	if (!fence)
+		return 0;
+
+	return ktime_us_delta(ktime_get(),
+		to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct dma_fence *fence;
+
+	seq &= drv->num_fences_mask;
+	fence = drv->fences[seq];
+	if (!fence)
+		return;
+
+	to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
+ * amdgpu_fence_driver_start_ring - make the fence driver
+ * ready for use on the requested ring.
+ *
+ * @ring: ring to start the fence driver on
+ * @irq_src: interrupt source to use for this ring
+ * @irq_type: interrupt type to use for this ring
+ *
+ * Make the fence driver ready for processing (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has.
+ * Returns 0 for success, errors for failure.
+ */
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+				   struct amdgpu_irq_src *irq_src,
+				   unsigned int irq_type)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint64_t index;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) {
+		ring->fence_drv.cpu_addr = ring->fence_cpu_addr;
+		ring->fence_drv.gpu_addr = ring->fence_gpu_addr;
+	} else {
+		/* put fence directly behind firmware */
+		index = ALIGN(adev->uvd.fw->size, 8);
+		ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+		ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
+	}
+	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
+
+	ring->fence_drv.irq_src = irq_src;
+	ring->fence_drv.irq_type = irq_type;
+	ring->fence_drv.initialized = true;
+
+	DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr 0x%016llx\n",
+		      ring->name, ring->fence_drv.gpu_addr);
+	return 0;
+}
+
+/**
+ * amdgpu_fence_driver_init_ring - init the fence driver
+ * for the requested ring.
+ *
+ * @ring: ring to init the fence driver on
+ *
+ * Init the fence driver for the requested ring (all asics).
+ * Helper function for amdgpu_fence_driver_init().
+ */
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (!adev)
+		return -EINVAL;
+
+	if (!is_power_of_2(ring->num_hw_submission))
+		return -EINVAL;
+
+	ring->fence_drv.cpu_addr = NULL;
+	ring->fence_drv.gpu_addr = 0;
+	ring->fence_drv.sync_seq = 0;
+	atomic_set(&ring->fence_drv.last_seq, 0);
+	ring->fence_drv.initialized = false;
+
+	timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
+
+	ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
+	spin_lock_init(&ring->fence_drv.lock);
+	ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
+					 GFP_KERNEL);
+
+	if (!ring->fence_drv.fences)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * amdgpu_fence_driver_sw_init - init the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Init the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * amdgpu_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+
+/**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool is_gfx_power_domain = false;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_SDMA:
+	/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+		if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
+		    IP_VERSION(5, 0, 0))
+			is_gfx_power_domain = true;
+		break;
+	case AMDGPU_RING_TYPE_GFX:
+	case AMDGPU_RING_TYPE_COMPUTE:
+	case AMDGPU_RING_TYPE_KIQ:
+	case AMDGPU_RING_TYPE_MES:
+		is_gfx_power_domain = true;
+		break;
+	default:
+		break;
+	}
+
+	return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
+ * amdgpu_fence_driver_hw_fini - tear down the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Tear down the fence driver for all possible rings (all asics).
+ */
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->fence_drv.initialized)
+			continue;
+
+		/* You can't wait for HW to signal if it's gone */
+		if (!drm_dev_is_unplugged(adev_to_drm(adev)))
+			r = amdgpu_fence_wait_empty(ring);
+		else
+			r = -ENODEV;
+		/* no need to trigger GPU reset as we are unloading */
+		if (r)
+			amdgpu_fence_driver_force_completion(ring);
+
+		if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
+		    ring->fence_drv.irq_src &&
+		    amdgpu_fence_need_ring_interrupt_restore(ring))
+			amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+				       ring->fence_drv.irq_type);
+
+		timer_delete_sync(&ring->fence_drv.fallback_timer);
+	}
+}
+
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
+{
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
+			continue;
+
+		if (stop)
+			disable_irq(adev->irq.irq);
+		else
+			enable_irq(adev->irq.irq);
+	}
+}
+
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->fence_drv.initialized)
+			continue;
+
+		/*
+		 * Notice we check for sched.ops since there's some
+		 * override on the meaning of sched.ready by amdgpu.
+		 * The natural check would be sched.ready, which is
+		 * set as drm_sched_init() finishes...
+		 */
+		if (ring->sched.ops)
+			drm_sched_fini(&ring->sched);
+
+		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+			dma_fence_put(ring->fence_drv.fences[j]);
+		kfree(ring->fence_drv.fences);
+		ring->fence_drv.fences = NULL;
+		ring->fence_drv.initialized = false;
+	}
+}
+
+/**
+ * amdgpu_fence_driver_hw_init - enable the fence driver
+ * for all possible rings.
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Enable the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * amdgpu_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->fence_drv.initialized)
+			continue;
+
+		/* enable the interrupt */
+		if (ring->fence_drv.irq_src &&
+		    amdgpu_fence_need_ring_interrupt_restore(ring))
+			amdgpu_irq_get(adev, ring->fence_drv.irq_src,
+				       ring->fence_drv.irq_type);
+	}
+}
+
+/**
+ * amdgpu_fence_driver_set_error - set error code on fences
+ * @ring: the ring which contains the fences
+ * @error: the error code to set
+ *
+ * Set an error code to all the fences pending on the ring.
+ */
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&drv->lock, flags);
+	for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) {
+		struct dma_fence *fence;
+
+		fence = rcu_dereference_protected(drv->fences[i],
+						  lockdep_is_held(&drv->lock));
+		if (fence && !dma_fence_is_signaled_locked(fence))
+			dma_fence_set_error(fence, error);
+	}
+	spin_unlock_irqrestore(&drv->lock, flags);
+}
+
+/**
+ * amdgpu_fence_driver_force_completion - force signal latest fence of ring
+ *
+ * @ring: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
+{
+	amdgpu_fence_driver_set_error(ring, -ECANCELED);
+	amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
+	amdgpu_fence_process(ring);
+}
+
+
+/*
+ * Kernel queue reset handling
+ *
+ * The driver can reset individual queues for most engines, but those queues
+ * may contain work from multiple contexts.  Resetting the queue will reset
+ * lose all of that state.  In order to minimize the collateral damage, the
+ * driver will save the ring contents which are not associated with the guilty
+ * context prior to resetting the queue.  After resetting the queue the queue
+ * contents from the other contexts is re-emitted to the rings so that it can
+ * be processed by the engine.  To handle this, we save the queue's write
+ * pointer (wptr) in the fences associated with each context.  If we get a
+ * queue timeout, we can then use the wptrs from the fences to determine
+ * which data needs to be saved out of the queue's ring buffer.
+ */
+
+/**
+ * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
+ *
+ * @af: fence of the ring to signal
+ *
+ */
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
+{
+	struct dma_fence *unprocessed;
+	struct dma_fence __rcu **ptr;
+	struct amdgpu_fence *fence;
+	struct amdgpu_ring *ring = af->ring;
+	unsigned long flags;
+	u32 seq, last_seq;
+
+	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+
+	/* mark all fences from the guilty context with an error */
+	spin_lock_irqsave(&ring->fence_drv.lock, flags);
+	do {
+		last_seq++;
+		last_seq &= ring->fence_drv.num_fences_mask;
+
+		ptr = &ring->fence_drv.fences[last_seq];
+		rcu_read_lock();
+		unprocessed = rcu_dereference(*ptr);
+
+		if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
+			fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+			if (fence == af)
+				dma_fence_set_error(&fence->base, -ETIME);
+			else if (fence->context == af->context)
+				dma_fence_set_error(&fence->base, -ECANCELED);
+		}
+		rcu_read_unlock();
+	} while (last_seq != seq);
+	spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
+	/* signal the guilty fence */
+	amdgpu_fence_write(ring, (u32)af->base.seqno);
+	amdgpu_fence_process(ring);
+}
+
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
+{
+	af->wptr = af->ring->wptr;
+}
+
+static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
+						   u64 start_wptr, u32 end_wptr)
+{
+	unsigned int first_idx = start_wptr & ring->buf_mask;
+	unsigned int last_idx = end_wptr & ring->buf_mask;
+	unsigned int i;
+
+	/* Backup the contents of the ring buffer. */
+	for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+		ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i];
+}
+
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+					     struct amdgpu_fence *guilty_fence)
+{
+	struct dma_fence *unprocessed;
+	struct dma_fence __rcu **ptr;
+	struct amdgpu_fence *fence;
+	u64 wptr;
+	u32 seq, last_seq;
+
+	last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
+	seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
+	wptr = ring->fence_drv.signalled_wptr;
+	ring->ring_backup_entries_to_copy = 0;
+
+	do {
+		last_seq++;
+		last_seq &= ring->fence_drv.num_fences_mask;
+
+		ptr = &ring->fence_drv.fences[last_seq];
+		rcu_read_lock();
+		unprocessed = rcu_dereference(*ptr);
+
+		if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
+			fence = container_of(unprocessed, struct amdgpu_fence, base);
+
+			/* save everything if the ring is not guilty, otherwise
+			 * just save the content from other contexts.
+			 */
+			if (!guilty_fence || (fence->context != guilty_fence->context))
+				amdgpu_ring_backup_unprocessed_command(ring, wptr,
+								       fence->wptr);
+			wptr = fence->wptr;
+		}
+		rcu_read_unlock();
+	} while (last_seq != seq);
+}
+
+/*
+ * Common fence implementation
+ */
+
+static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu";
+}
+
+static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
+{
+	return (const char *)to_amdgpu_fence(f)->ring->name;
+}
+
+/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @f: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
+{
+	if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
+		amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);
+
+	return true;
+}
+
+/**
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the fence memory after the RCU grace period.
+ */
+static void amdgpu_fence_free(struct rcu_head *rcu)
+{
+	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+
+	/* free fence_slab if it's separated fence*/
+	kfree(to_amdgpu_fence(f));
+}
+
+/**
+ * amdgpu_fence_release - callback that fence can be freed
+ *
+ * @f: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amdgpu_fence_release(struct dma_fence *f)
+{
+	call_rcu(&f->rcu, amdgpu_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_fence_ops = {
+	.get_driver_name = amdgpu_fence_get_driver_name,
+	.get_timeline_name = amdgpu_fence_get_timeline_name,
+	.enable_signaling = amdgpu_fence_enable_signaling,
+	.release = amdgpu_fence_release,
+};
+
+/*
+ * Fence debugfs
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (!ring || !ring->fence_drv.initialized)
+			continue;
+
+		amdgpu_fence_process(ring);
+
+		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
+		seq_printf(m, "Last signaled fence          0x%08x\n",
+			   atomic_read(&ring->fence_drv.last_seq));
+		seq_printf(m, "Last emitted                 0x%08x\n",
+			   ring->fence_drv.sync_seq);
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
+		    ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
+			seq_printf(m, "Last signaled trailing fence 0x%08x\n",
+				   le32_to_cpu(*ring->trail_fence_cpu_addr));
+			seq_printf(m, "Last emitted                 0x%08x\n",
+				   ring->trail_seq);
+		}
+
+		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+			continue;
+
+		/* set in CP_VMID_PREEMPT and preemption occurred */
+		seq_printf(m, "Last preempted               0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
+		/* set in CP_VMID_RESET and reset occurred */
+		seq_printf(m, "Last reset                   0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
+		/* Both preemption and reset occurred */
+		seq_printf(m, "Last both                    0x%08x\n",
+			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
+	}
+	return 0;
+}
+
+/*
+ * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int gpu_recover_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return 0;
+	}
+
+	if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+		flush_work(&adev->reset_work);
+
+	*val = atomic_read(&adev->reset_domain->reset_res);
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
+			 "%lld\n");
+
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  reset_work);
+
+	struct amdgpu_reset_context reset_context;
+
+	memset(&reset_context, 0, sizeof(reset_context));
+
+	reset_context.method = AMD_RESET_METHOD_NONE;
+	reset_context.reset_req_dev = adev;
+	reset_context.src = AMDGPU_RESET_SRC_USER;
+	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+#endif
+
+void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
+			    &amdgpu_debugfs_fence_info_fops);
+
+	if (!amdgpu_sriov_vf(adev)) {
+
+		INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
+		debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
+				    &amdgpu_debugfs_gpu_recover_fops);
+	}
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
new file mode 100644
index 000000000000..b0082aa7f3c6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "smu_v11_0_i2c.h"
+#include "atom.h"
+#include "amdgpu_fru_eeprom.h"
+#include "amdgpu_eeprom.h"
+
+#define FRU_EEPROM_MADDR_6      0x60000
+#define FRU_EEPROM_MADDR_8      0x80000
+#define FRU_EEPROM_MADDR_INV    0xFFFFF
+
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
+{
+	/* Only server cards have the FRU EEPROM
+	 * TODO: See if we can figure this out dynamically instead of
+	 * having to parse VBIOS versions.
+	 */
+	struct atom_context *atom_ctx = adev->mode_info.atom_context;
+
+	/* The i2c access is blocked on VF
+	 * TODO: Need other way to get the info
+	 * Also, FRU not valid for APU devices.
+	 */
+	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+		return false;
+
+	/* The default I2C EEPROM address of the FRU.
+	 */
+	if (fru_addr)
+		*fru_addr = FRU_EEPROM_MADDR_8;
+
+	/* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,
+	 * we can use just the "DXXX" portion. If there were more models, we
+	 * could convert the 3 characters to a hex integer and use a switch
+	 * for ease/speed/readability. For now, 2 string comparisons are
+	 * reasonable and not too expensive
+	 */
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(11, 0, 2):
+		switch (adev->asic_type) {
+		case CHIP_VEGA20:
+			/* D161 and D163 are the VG20 server SKUs */
+			if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161",
+						 sizeof(atom_ctx->vbios_pn)) ||
+					 strnstr(atom_ctx->vbios_pn, "D163",
+						 sizeof(atom_ctx->vbios_pn)))) {
+				if (fru_addr)
+					*fru_addr = FRU_EEPROM_MADDR_6;
+				return true;
+			} else {
+				return false;
+			}
+		case CHIP_ARCTURUS:
+		default:
+			return false;
+		}
+	case IP_VERSION(11, 0, 7):
+		if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603",
+					sizeof(atom_ctx->vbios_pn))) {
+			if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
+				    sizeof(atom_ctx->vbios_pn))) {
+				return false;
+			}
+
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_6;
+			return true;
+
+		} else {
+			return false;
+		}
+	case IP_VERSION(13, 0, 2):
+		/* All Aldebaran SKUs have an FRU */
+		if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673",
+					 sizeof(atom_ctx->vbios_pn)))
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_6;
+		return true;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 14):
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_8;
+			return true;
+	case IP_VERSION(13, 0, 12):
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_INV;
+			return true;
+	default:
+		return false;
+	}
+}
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_fru_info *fru_info;
+	unsigned char buf[8], *pia;
+	u32 addr, fru_addr;
+	int size, len;
+	u8 csum;
+
+	if (!is_fru_eeprom_supported(adev, &fru_addr))
+		return 0;
+
+	/* FRU data avaialble, but no direct EEPROM access */
+	if (fru_addr == FRU_EEPROM_MADDR_INV)
+		return 0;
+
+	if (!adev->fru_info) {
+		adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL);
+		if (!adev->fru_info)
+			return -ENOMEM;
+	}
+
+	fru_info = adev->fru_info;
+	/* For Arcturus-and-later, default value of serial_number is unique_id
+	 * so convert it to a 16-digit HEX string for convenience and
+	 * backwards-compatibility.
+	 */
+	sprintf(fru_info->serial, "%llx", adev->unique_id);
+
+	/* If algo exists, it means that the i2c_adapter's initialized */
+	if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) {
+		dev_warn(adev->dev,
+			 "Cannot access FRU, EEPROM accessor not initialized");
+		return -ENODEV;
+	}
+
+	/* Read the IPMI Common header */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
+				 sizeof(buf));
+	if (len != 8) {
+		dev_err(adev->dev, "Couldn't read the IPMI Common Header: %d",
+			len);
+		return len < 0 ? len : -EIO;
+	}
+
+	if (buf[0] != 1) {
+		dev_err(adev->dev, "Bad IPMI Common Header version: 0x%02x",
+			buf[0]);
+		return -EIO;
+	}
+
+	for (csum = 0; len > 0; len--)
+		csum += buf[len - 1];
+	if (csum) {
+		dev_err(adev->dev, "Bad IPMI Common Header checksum: 0x%02x",
+			csum);
+		return -EIO;
+	}
+
+	/* Get the offset to the Product Info Area (PIA). */
+	addr = buf[4] * 8;
+	if (!addr)
+		return 0;
+
+	/* Get the absolute address to the PIA. */
+	addr += fru_addr;
+
+	/* Read the header of the PIA. */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
+	if (len != 3) {
+		dev_err(adev->dev,
+			"Couldn't read the Product Info Area header: %d", len);
+		return len < 0 ? len : -EIO;
+	}
+
+	if (buf[0] != 1) {
+		dev_err(adev->dev, "Bad IPMI Product Info Area version: 0x%02x",
+			buf[0]);
+		return -EIO;
+	}
+
+	size = buf[1] * 8;
+	pia = kzalloc(size, GFP_KERNEL);
+	if (!pia)
+		return -ENOMEM;
+
+	/* Read the whole PIA. */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
+	if (len != size) {
+		kfree(pia);
+		dev_err(adev->dev, "Couldn't read the Product Info Area: %d",
+			len);
+		return len < 0 ? len : -EIO;
+	}
+
+	for (csum = 0; size > 0; size--)
+		csum += pia[size - 1];
+	if (csum) {
+		dev_err(adev->dev, "Bad Product Info Area checksum: 0x%02x",
+			csum);
+		kfree(pia);
+		return -EIO;
+	}
+
+	/* Now extract useful information from the PIA.
+	 *
+	 * Read Manufacturer Name field whose length is [3].
+	 */
+	addr = 3;
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(fru_info->manufacturer_name, pia + addr + 1,
+	       min_t(size_t, sizeof(fru_info->manufacturer_name),
+		     pia[addr] & 0x3F));
+	fru_info->manufacturer_name[sizeof(fru_info->manufacturer_name) - 1] =
+		'\0';
+
+	/* Read Product Name field. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(fru_info->product_name, pia + addr + 1,
+	       min_t(size_t, sizeof(fru_info->product_name), pia[addr] & 0x3F));
+	fru_info->product_name[sizeof(fru_info->product_name) - 1] = '\0';
+
+	/* Go to the Product Part/Model Number field. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(fru_info->product_number, pia + addr + 1,
+	       min_t(size_t, sizeof(fru_info->product_number),
+		     pia[addr] & 0x3F));
+	fru_info->product_number[sizeof(fru_info->product_number) - 1] = '\0';
+
+	/* Go to the Product Version field. */
+	addr += 1 + (pia[addr] & 0x3F);
+
+	/* Go to the Product Serial Number field. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(fru_info->serial, pia + addr + 1,
+	       min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F));
+	fru_info->serial[sizeof(fru_info->serial) - 1] = '\0';
+
+	/* Asset Tag field */
+	addr += 1 + (pia[addr] & 0x3F);
+
+	/* FRU File Id field. This could be 'null'. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if ((addr + 1 >= len) || !(pia[addr] & 0x3F))
+		goto Out;
+	memcpy(fru_info->fru_id, pia + addr + 1,
+	       min_t(size_t, sizeof(fru_info->fru_id), pia[addr] & 0x3F));
+	fru_info->fru_id[sizeof(fru_info->fru_id) - 1] = '\0';
+
+Out:
+	kfree(pia);
+	return 0;
+}
+
+/**
+ * DOC: product_name
+ *
+ * The amdgpu driver provides a sysfs API for reporting the product name
+ * for the device
+ * The file product_name is used for this and returns the product name
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_name_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%s\n", adev->fru_info->product_name);
+}
+
+static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL);
+
+/**
+ * DOC: product_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the part number
+ * for the device
+ * The file product_number is used for this and returns the part number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_product_number_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%s\n", adev->fru_info->product_number);
+}
+
+static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL);
+
+/**
+ * DOC: serial_number
+ *
+ * The amdgpu driver provides a sysfs API for reporting the serial number
+ * for the device
+ * The file serial_number is used for this and returns the serial number
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_serial_number_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%s\n", adev->fru_info->serial);
+}
+
+static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL);
+
+/**
+ * DOC: fru_id
+ *
+ * The amdgpu driver provides a sysfs API for reporting FRU File Id
+ * for the device.
+ * The file fru_id is used for this and returns the File Id value
+ * as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_id_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%s\n", adev->fru_info->fru_id);
+}
+
+static DEVICE_ATTR(fru_id, 0444, amdgpu_fru_id_show, NULL);
+
+/**
+ * DOC: manufacturer
+ *
+ * The amdgpu driver provides a sysfs API for reporting manufacturer name from
+ * FRU information.
+ * The file manufacturer returns the value as returned from the FRU.
+ * NOTE: This is only available for certain server cards
+ */
+
+static ssize_t amdgpu_fru_manufacturer_name_show(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%s\n", adev->fru_info->manufacturer_name);
+}
+
+static DEVICE_ATTR(manufacturer, 0444, amdgpu_fru_manufacturer_name_show, NULL);
+
+static const struct attribute *amdgpu_fru_attributes[] = {
+	&dev_attr_product_name.attr,
+	&dev_attr_product_number.attr,
+	&dev_attr_serial_number.attr,
+	&dev_attr_fru_id.attr,
+	&dev_attr_manufacturer.attr,
+	NULL
+};
+
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
+{
+	if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
+		return 0;
+
+	return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
+
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!adev->fru_info)
+		return;
+
+	sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
new file mode 100644
index 000000000000..98f3196599ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_FRU_EEPROM_H__
+#define __AMDGPU_FRU_EEPROM_H__
+
+#define AMDGPU_PRODUCT_NAME_LEN 64
+
+/* FRU product information */
+struct amdgpu_fru_info {
+	char				product_number[20];
+	char				product_name[AMDGPU_PRODUCT_NAME_LEN];
+	char				serial[20];
+	char				manufacturer_name[32];
+	char				fru_id[50];
+};
+
+int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
+int amdgpu_fru_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev);
+
+#endif  // __AMDGPU_FRU_EEPROM_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
new file mode 100644
index 000000000000..328a1b963548
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/firmware.h>
+#include <linux/dma-mapping.h>
+
+#include "amdgpu.h"
+#include "amdgpu_fw_attestation.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+
+#define FW_ATTESTATION_DB_COOKIE        0x143b6a37
+#define FW_ATTESTATION_RECORD_VALID	1
+#define FW_ATTESTATION_MAX_SIZE		4096
+
+struct FW_ATT_DB_HEADER {
+	uint32_t AttDbVersion;           /* version of the fwar feature */
+	uint32_t AttDbCookie;            /* cookie as an extra check for corrupt data */
+};
+
+struct FW_ATT_RECORD {
+	uint16_t AttFwIdV1;              /* Legacy FW Type field */
+	uint16_t AttFwIdV2;              /* V2 FW ID field */
+	uint32_t AttFWVersion;           /* FW Version */
+	uint16_t AttFWActiveFunctionID;  /* The VF ID (only in VF Attestation Table) */
+	uint8_t  AttSource;              /* FW source indicator */
+	uint8_t  RecordValid;            /* Indicates whether the record is a valid entry */
+	uint32_t AttFwTaId;              /* Ta ID (only in TA Attestation Table) */
+};
+
+static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f,
+						  char __user *buf,
+						  size_t size,
+						  loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	uint64_t records_addr = 0;
+	uint64_t vram_pos = 0;
+	struct FW_ATT_DB_HEADER fw_att_hdr = {0};
+	struct FW_ATT_RECORD fw_att_record = {0};
+
+	if (size < sizeof(struct FW_ATT_RECORD)) {
+		DRM_WARN("FW attestation input buffer not enough memory");
+		return -EINVAL;
+	}
+
+	if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) {
+		DRM_WARN("FW attestation out of bounds");
+		return 0;
+	}
+
+	if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) {
+		DRM_WARN("Failed to get FW attestation record address");
+		return -EINVAL;
+	}
+
+	vram_pos =  records_addr - adev->gmc.vram_start;
+
+	if (*pos == 0) {
+		amdgpu_device_vram_access(adev,
+					  vram_pos,
+					  (uint32_t *)&fw_att_hdr,
+					  sizeof(struct FW_ATT_DB_HEADER),
+					  false);
+
+		if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) {
+			DRM_WARN("Invalid FW attestation cookie");
+			return -EINVAL;
+		}
+
+		DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion);
+	}
+
+	amdgpu_device_vram_access(adev,
+				  vram_pos + sizeof(struct FW_ATT_DB_HEADER) + *pos,
+				  (uint32_t *)&fw_att_record,
+				  sizeof(struct FW_ATT_RECORD),
+				  false);
+
+	if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID)
+		return 0;
+
+	if (copy_to_user(buf, (void *)&fw_att_record, sizeof(struct FW_ATT_RECORD)))
+		return -EINVAL;
+
+	*pos += sizeof(struct FW_ATT_RECORD);
+
+	return sizeof(struct FW_ATT_RECORD);
+}
+
+static const struct file_operations amdgpu_fw_attestation_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_fw_attestation_debugfs_read,
+	.write = NULL,
+	.llseek = default_llseek
+};
+
+static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
+{
+	if (adev->flags & AMD_IS_APU)
+		return 0;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+		return 0;
+
+	if (adev->asic_type >= CHIP_SIENNA_CICHLID)
+		return 1;
+
+	return 0;
+}
+
+void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev)
+{
+	if (!amdgpu_is_fw_attestation_supported(adev))
+		return;
+
+	debugfs_create_file("amdgpu_fw_attestation",
+			    0400,
+			    adev_to_drm(adev)->primary->debugfs_root,
+			    adev,
+			    &amdgpu_fw_attestation_debugfs_ops);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h
new file mode 100644
index 000000000000..90af4fe58c99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_FW_ATTESTATION_H
+#define _AMDGPU_FW_ATTESTATION_H
+
+#include "amdgpu.h"
+
+void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
new file mode 100644
index 000000000000..d2237ce9da70
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include <drm/amdgpu_drm.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+/*
+ * GART
+ * The GART (Graphics Aperture Remapping Table) is an aperture
+ * in the GPU's address space.  System pages can be mapped into
+ * the aperture and look like contiguous pages from the GPU's
+ * perspective.  A page table maps the pages in the aperture
+ * to the actual backing pages in system memory.
+ *
+ * Radeon GPUs support both an internal GART, as described above,
+ * and AGP.  AGP works similarly, but the GART table is configured
+ * and maintained by the northbridge rather than the driver.
+ * Radeon hw has a separate AGP aperture that is programmed to
+ * point to the AGP aperture provided by the northbridge and the
+ * requests are passed through to the northbridge aperture.
+ * Both AGP and internal GART can be used at the same time, however
+ * that is not currently supported by the driver.
+ *
+ * This file handles the common internal GART management.
+ */
+
+/*
+ * Common GART table functions.
+ */
+
+/**
+ * amdgpu_gart_dummy_page_init - init dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the dummy page used by the driver (all asics).
+ * This dummy page is used by the driver as a filler for gart entries
+ * when pages are taken out of the GART
+ * Returns 0 on sucess, -ENOMEM on failure.
+ */
+static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
+{
+	struct page *dummy_page = ttm_glob.dummy_read_page;
+
+	if (adev->dummy_page_addr)
+		return 0;
+	adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0,
+							PAGE_SIZE, DMA_BIDIRECTIONAL,
+							DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
+		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
+		adev->dummy_page_addr = 0;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Frees the dummy page used by the driver (all asics).
+ */
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
+{
+	if (!adev->dummy_page_addr)
+		return;
+	dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+				DMA_BIDIRECTIONAL,
+				DMA_ATTR_SKIP_CPU_SYNC);
+	adev->dummy_page_addr = 0;
+}
+
+/**
+ * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate system memory for GART page table for ASICs that don't have
+ * dedicated VRAM.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev)
+{
+	unsigned int order = get_order(adev->gart.table_size);
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO;
+	struct amdgpu_bo *bo = NULL;
+	struct sg_table *sg = NULL;
+	struct amdgpu_bo_param bp;
+	dma_addr_t dma_addr;
+	struct page *p;
+	unsigned long x;
+	int ret;
+
+	if (adev->gart.bo != NULL)
+		return 0;
+
+	p = alloc_pages(gfp_flags, order);
+	if (!p)
+		return -ENOMEM;
+
+	/* assign pages to this device */
+	for (x = 0; x < (1UL << order); x++)
+		p[x].mapping = adev->mman.bdev.dev_mapping;
+
+	/* If the hardware does not support UTCL2 snooping of the CPU caches
+	 * then set_memory_wc() could be used as a workaround to mark the pages
+	 * as write combine memory.
+	 */
+	dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size,
+				DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&adev->pdev->dev, dma_addr)) {
+		dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n");
+		__free_pages(p, order);
+		p = NULL;
+		return -EFAULT;
+	}
+
+	dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr);
+	/* Create SG table */
+	sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+	if (!sg) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	ret = sg_alloc_table(sg, 1, GFP_KERNEL);
+	if (ret)
+		goto error;
+
+	sg_dma_address(sg->sgl) = dma_addr;
+	sg->sgl->length = adev->gart.table_size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+	sg->sgl->dma_length = adev->gart.table_size;
+#endif
+	/* Create SG BO */
+	memset(&bp, 0, sizeof(bp));
+	bp.size = adev->gart.table_size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+	bp.type = ttm_bo_type_sg;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+	bp.flags = 0;
+	ret = amdgpu_bo_create(adev, &bp, &bo);
+	if (ret)
+		goto error;
+
+	bo->tbo.sg = sg;
+	bo->tbo.ttm->sg = sg;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret);
+		goto error;
+	}
+
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	WARN(ret, "Pinning the GART table failed");
+	if (ret)
+		goto error_resv;
+
+	adev->gart.bo = bo;
+	adev->gart.ptr = page_to_virt(p);
+	/* Make GART table accessible in VMID0 */
+	ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo);
+	if (ret)
+		amdgpu_gart_table_ram_free(adev);
+	amdgpu_bo_unreserve(bo);
+
+	return 0;
+
+error_resv:
+	amdgpu_bo_unreserve(bo);
+error:
+	amdgpu_bo_unref(&bo);
+	if (sg) {
+		sg_free_table(sg);
+		kfree(sg);
+	}
+	__free_pages(p, order);
+	return ret;
+}
+
+/**
+ * amdgpu_gart_table_ram_free - free gart page table system ram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the system memory used for the GART page tableon ASICs that don't
+ * have dedicated VRAM.
+ */
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev)
+{
+	unsigned int order = get_order(adev->gart.table_size);
+	struct sg_table *sg = adev->gart.bo->tbo.sg;
+	struct page *p;
+	unsigned long x;
+	int ret;
+
+	ret = amdgpu_bo_reserve(adev->gart.bo, false);
+	if (!ret) {
+		amdgpu_bo_unpin(adev->gart.bo);
+		amdgpu_bo_unreserve(adev->gart.bo);
+	}
+	amdgpu_bo_unref(&adev->gart.bo);
+	sg_free_table(sg);
+	kfree(sg);
+	p = virt_to_page(adev->gart.ptr);
+	for (x = 0; x < (1UL << order); x++)
+		p[x].mapping = NULL;
+	__free_pages(p, order);
+
+	adev->gart.ptr = NULL;
+}
+
+/**
+ * amdgpu_gart_table_vram_alloc - allocate vram for gart page table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+).  These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
+{
+	if (adev->gart.bo != NULL)
+		return 0;
+
+	return amdgpu_bo_create_kernel(adev,  adev->gart.table_size, PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM, &adev->gart.bo,
+				       NULL, (void *)&adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_table_vram_free - free gart page table vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the video memory used for the GART page table
+ * (pcie r4xx, r5xx+).  These asics require the gart table to
+ * be in video memory.
+ */
+void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gart.bo, NULL, (void *)&adev->gart.ptr);
+}
+
+/*
+ * Common gart functions.
+ */
+/**
+ * amdgpu_gart_unbind - unbind pages from the gart page table
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to unbind
+ *
+ * Unbinds the requested pages from the gart page table and
+ * replaces them with the dummy page (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+			int pages)
+{
+	unsigned t;
+	int i, j;
+	u64 page_base;
+	/* Starting from VEGA10, system bit must be 0 to mean invalid. */
+	uint64_t flags = 0;
+	int idx;
+
+	if (!adev->gart.ptr)
+		return;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return;
+
+	t = offset / AMDGPU_GPU_PAGE_SIZE;
+	for (i = 0; i < pages; i++) {
+		page_base = adev->dummy_page_addr;
+		if (!adev->gart.ptr)
+			continue;
+
+		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+			amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+					       t, page_base, flags);
+			page_base += AMDGPU_GPU_PAGE_SIZE;
+		}
+	}
+	amdgpu_gart_invalidate_tlb(adev);
+
+	drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_map - map dma_addresses into GART entries
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ * @dst: CPU address of the gart table
+ *
+ * Map the dma_addresses into GART entries (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+		    int pages, dma_addr_t *dma_addr, uint64_t flags,
+		    void *dst)
+{
+	uint64_t page_base;
+	unsigned i, j, t;
+	int idx;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return;
+
+	t = offset / AMDGPU_GPU_PAGE_SIZE;
+
+	for (i = 0; i < pages; i++) {
+		page_base = dma_addr[i];
+		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+			amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
+			page_base += AMDGPU_GPU_PAGE_SIZE;
+		}
+	}
+	drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_map_vram_range - map VRAM pages into the GART page table
+ *
+ * @adev: amdgpu_device pointer
+ * @pa: physical address of the first page to be mapped
+ * @start_page: first page to map in the GART aperture
+ * @num_pages: number of pages to be mapped
+ * @flags: page table entry flags
+ * @dst: CPU address of the GART table
+ *
+ * Binds a BO that is allocated in VRAM to the GART page table
+ * (all ASICs).
+ *
+ * Useful when a kernel BO is located in VRAM but
+ * needs to be accessed from the GART address space.
+ */
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+				uint64_t start_page, uint64_t num_pages,
+				uint64_t flags, void *dst)
+{
+	u32 i, idx;
+
+	/* The SYSTEM flag indicates the pages aren't in VRAM. */
+	WARN_ON_ONCE(flags & AMDGPU_PTE_SYSTEM);
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return;
+
+	for (i = 0; i < num_pages; ++i) {
+		amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+			start_page + i, pa + AMDGPU_GPU_PAGE_SIZE * i, flags);
+	}
+
+	drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gart_bind - bind pages into the gart page table
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ *
+ * Binds the requested pages to the gart page table
+ * (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+		     int pages, dma_addr_t *dma_addr,
+		     uint64_t flags)
+{
+	if (!adev->gart.ptr)
+		return;
+
+	amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr);
+}
+
+/**
+ * amdgpu_gart_invalidate_tlb - invalidate gart TLB
+ *
+ * @adev: amdgpu device driver pointer
+ *
+ * Invalidate gart TLB which can be use as a way to flush gart changes
+ *
+ */
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!adev->gart.ptr)
+		return;
+
+	mb();
+	if (down_read_trylock(&adev->reset_domain->sem)) {
+		amdgpu_device_flush_hdp(adev, NULL);
+		up_read(&adev->reset_domain->sem);
+	}
+	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
+		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+}
+
+/**
+ * amdgpu_gart_init - init the driver info for managing the gart
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the dummy page and init the gart driver info (all asics).
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gart_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->dummy_page_addr)
+		return 0;
+
+	/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
+	if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) {
+		DRM_ERROR("Page size is smaller than GPU page size!\n");
+		return -EINVAL;
+	}
+	r = amdgpu_gart_dummy_page_init(adev);
+	if (r)
+		return r;
+	/* Compute table size */
+	adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
+	adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
+	DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
+		 adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
new file mode 100644
index 000000000000..d3118275ddae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GART_H__
+#define __AMDGPU_GART_H__
+
+#include <linux/types.h>
+
+/*
+ * GART structures, functions & helpers
+ */
+struct amdgpu_device;
+struct amdgpu_bo;
+
+#define AMDGPU_GPU_PAGE_SIZE 4096
+#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
+#define AMDGPU_GPU_PAGE_SHIFT 12
+#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
+
+#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
+
+struct amdgpu_gart {
+	struct amdgpu_bo		*bo;
+	/* CPU kmapped address of gart table */
+	void				*ptr;
+	unsigned			num_gpu_pages;
+	unsigned			num_cpu_pages;
+	unsigned			table_size;
+
+	/* Asic default pte flags */
+	uint64_t			gart_pte_flags;
+};
+
+int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_ram_free(struct amdgpu_device *adev);
+int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
+void amdgpu_gart_table_vram_free(struct amdgpu_device *adev);
+int amdgpu_gart_init(struct amdgpu_device *adev);
+void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+			int pages);
+void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
+		     int pages, dma_addr_t *dma_addr, uint64_t flags,
+		     void *dst);
+void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
+		      int pages, dma_addr_t *dma_addr, uint64_t flags);
+void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
+				uint64_t start_page, uint64_t num_pages,
+				uint64_t flags, void *dst);
+void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
new file mode 100644
index 000000000000..f6ac1e9548f2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GDS_H__
+#define __AMDGPU_GDS_H__
+
+struct amdgpu_ring;
+struct amdgpu_bo;
+
+struct amdgpu_gds {
+	uint32_t gds_size;
+	uint32_t gws_size;
+	uint32_t oa_size;
+	uint32_t gds_compute_max_wave_id;
+};
+
+struct amdgpu_gds_reg_offset {
+	uint32_t	mem_base;
+	uint32_t	mem_size;
+	uint32_t	gws;
+	uint32_t	oa;
+};
+
+#endif /* __AMDGPU_GDS_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
new file mode 100644
index 000000000000..3e38c5db2987
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -0,0 +1,1304 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <linux/ktime.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gem_ttm_helper.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_display.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
+
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+			   uint64_t syncobj_handles_array,
+			   uint32_t num_syncobj_handles)
+{
+	struct dma_fence *fence;
+	uint32_t *syncobj_handles;
+	int ret, i;
+
+	if (!num_syncobj_handles)
+		return 0;
+
+	syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+				      size_mul(sizeof(uint32_t), num_syncobj_handles));
+	if (IS_ERR(syncobj_handles))
+		return PTR_ERR(syncobj_handles);
+
+	for (i = 0; i < num_syncobj_handles; i++) {
+
+		if (!syncobj_handles[i]) {
+			ret = -EINVAL;
+			goto free_memdup;
+		}
+
+		ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+		if (ret)
+			goto free_memdup;
+
+		dma_fence_wait(fence, false);
+
+		/* TODO: optimize async handling */
+		dma_fence_put(fence);
+	}
+
+free_memdup:
+	kfree(syncobj_handles);
+	return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+				uint32_t syncobj_handle,
+				uint64_t point,
+				struct drm_syncobj **syncobj,
+				struct dma_fence_chain **chain)
+{
+	if (!syncobj_handle)
+		return 0;
+
+	/* Find the sync object */
+	*syncobj = drm_syncobj_find(filp, syncobj_handle);
+	if (!*syncobj)
+		return -ENOENT;
+
+	if (!point)
+		return 0;
+
+	/* Allocate the chain node */
+	*chain = dma_fence_chain_alloc();
+	if (!*chain) {
+		drm_syncobj_put(*syncobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+			     struct amdgpu_bo_va *bo_va,
+			     uint32_t operation,
+			     uint64_t point,
+			     struct dma_fence *fence,
+			     struct drm_syncobj *syncobj,
+			     struct dma_fence_chain *chain)
+{
+	struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct dma_fence *last_update;
+
+	if (!syncobj)
+		return;
+
+	/* Find the last update fence */
+	switch (operation) {
+	case AMDGPU_VA_OP_MAP:
+	case AMDGPU_VA_OP_REPLACE:
+		if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+			last_update = vm->last_update;
+		else
+			last_update = bo_va->last_pt_update;
+		break;
+	case AMDGPU_VA_OP_UNMAP:
+	case AMDGPU_VA_OP_CLEAR:
+		last_update = fence;
+		break;
+	default:
+		return;
+	}
+
+	/* Add fence to timeline */
+	if (!point)
+		drm_syncobj_replace_fence(syncobj, last_update);
+	else
+		drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
+
+static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
+	struct drm_device *ddev = bo->base.dev;
+	vm_fault_t ret;
+	int idx;
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	if (drm_dev_enter(ddev, &idx)) {
+		ret = amdgpu_bo_fault_reserve_notify(bo);
+		if (ret) {
+			drm_dev_exit(idx);
+			goto unlock;
+		}
+
+		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+					       TTM_BO_VM_NUM_PREFAULT);
+
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+unlock:
+	dma_resv_unlock(bo->base.resv);
+	return ret;
+}
+
+static const struct vm_operations_struct amdgpu_gem_vm_ops = {
+	.fault = amdgpu_gem_fault,
+	.open = ttm_bo_vm_open,
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
+};
+
+static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
+{
+	struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
+
+	amdgpu_hmm_unregister(aobj);
+	ttm_bo_fini(&aobj->tbo);
+}
+
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+			     int alignment, u32 initial_domain,
+			     u64 flags, enum ttm_bo_type type,
+			     struct dma_resv *resv,
+			     struct drm_gem_object **obj, int8_t xcp_id_plus1)
+{
+	struct amdgpu_bo *bo;
+	struct amdgpu_bo_user *ubo;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	*obj = NULL;
+	flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
+
+	bp.size = size;
+	bp.byte_align = alignment;
+	bp.type = type;
+	bp.resv = resv;
+	bp.preferred_domain = initial_domain;
+	bp.flags = flags;
+	bp.domain = initial_domain;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+	bp.xcp_id_plus1 = xcp_id_plus1;
+
+	r = amdgpu_bo_create_user(adev, &bp, &ubo);
+	if (r)
+		return r;
+
+	bo = &ubo->bo;
+	*obj = &bo->tbo.base;
+
+	return 0;
+}
+
+void amdgpu_gem_force_release(struct amdgpu_device *adev)
+{
+	struct drm_device *ddev = adev_to_drm(adev);
+	struct drm_file *file;
+
+	mutex_lock(&ddev->filelist_mutex);
+
+	list_for_each_entry(file, &ddev->filelist, lhead) {
+		struct drm_gem_object *gobj;
+		int handle;
+
+		WARN_ONCE(1, "Still active user space clients!\n");
+		spin_lock(&file->table_lock);
+		idr_for_each_entry(&file->object_idr, gobj, handle) {
+			WARN_ONCE(1, "And also active allocations!\n");
+			drm_gem_object_put(gobj);
+		}
+		idr_destroy(&file->object_idr);
+		spin_unlock(&file->table_lock);
+	}
+
+	mutex_unlock(&ddev->filelist_mutex);
+}
+
+/*
+ * Call from drm_gem_handle_create which appear in both new and open ioctl
+ * case.
+ */
+static int amdgpu_gem_object_open(struct drm_gem_object *obj,
+				  struct drm_file *file_priv)
+{
+	struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_va *bo_va;
+	struct mm_struct *mm;
+	int r;
+
+	mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
+	if (mm && mm != current->mm)
+		return -EPERM;
+
+	if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
+	    !amdgpu_vm_is_bo_always_valid(vm, abo))
+		return -EPERM;
+
+	r = amdgpu_bo_reserve(abo, false);
+	if (r)
+		return r;
+
+	amdgpu_vm_bo_update_shared(abo);
+	bo_va = amdgpu_vm_bo_find(vm, abo);
+	if (!bo_va)
+		bo_va = amdgpu_vm_bo_add(adev, vm, abo);
+	else
+		++bo_va->ref_count;
+
+	/* attach gfx eviction fence */
+	r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+	if (r) {
+		DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+		amdgpu_bo_unreserve(abo);
+		return r;
+	}
+
+	amdgpu_bo_unreserve(abo);
+
+	/* Validate and add eviction fence to DMABuf imports with dynamic
+	 * attachment in compute VMs. Re-validation will be done by
+	 * amdgpu_vm_validate. Fences are on the reservation shared with the
+	 * export, which is currently required to be validated and fenced
+	 * already by amdgpu_amdkfd_gpuvm_restore_process_bos.
+	 *
+	 * Nested locking below for the case that a GEM object is opened in
+	 * kfd_mem_export_dmabuf. Since the lock below is only taken for imports,
+	 * but not for export, this is a different lock class that cannot lead to
+	 * circular lock dependencies.
+	 */
+	if (!vm->is_compute_context || !vm->process_info)
+		return 0;
+	if (!drm_gem_is_imported(obj) ||
+	    !dma_buf_is_dynamic(obj->import_attach->dmabuf))
+		return 0;
+	mutex_lock_nested(&vm->process_info->lock, 1);
+	if (!WARN_ON(!vm->process_info->eviction_fence)) {
+		r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT,
+							&vm->process_info->eviction_fence->base);
+		if (r) {
+			struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm);
+
+			dev_warn(adev->dev, "validate_and_fence failed: %d\n", r);
+			if (ti) {
+				dev_warn(adev->dev, "pid %d\n", ti->task.pid);
+				amdgpu_vm_put_task_info(ti);
+			}
+		}
+	}
+	mutex_unlock(&vm->process_info->lock);
+
+	return r;
+}
+
+static void amdgpu_gem_object_close(struct drm_gem_object *obj,
+				    struct drm_file *file_priv)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+
+	struct dma_fence *fence = NULL;
+	struct amdgpu_bo_va *bo_va;
+	struct drm_exec exec;
+	long r;
+
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto out_unlock;
+
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto out_unlock;
+	}
+
+	if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+		amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
+
+	bo_va = amdgpu_vm_bo_find(vm, bo);
+	if (!bo_va || --bo_va->ref_count)
+		goto out_unlock;
+
+	amdgpu_vm_bo_del(adev, bo_va);
+	amdgpu_vm_bo_update_shared(bo);
+	if (!amdgpu_vm_ready(vm))
+		goto out_unlock;
+
+	r = amdgpu_vm_clear_freed(adev, vm, &fence);
+	if (unlikely(r < 0))
+		dev_err(adev->dev, "failed to clear page "
+			"tables on GEM object close (%ld)\n", r);
+	if (r || !fence)
+		goto out_unlock;
+
+	amdgpu_bo_fence(bo, fence, true);
+	dma_fence_put(fence);
+
+out_unlock:
+	if (r)
+		dev_err(adev->dev, "leaking bo va (%ld)\n", r);
+	drm_exec_fini(&exec);
+}
+
+static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+		return -EPERM;
+	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+		return -EPERM;
+
+	/* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+	 * for debugger access to invisible VRAM. Should have used MAP_SHARED
+	 * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+	 * becoming writable and makes is_cow_mapping(vm_flags) false.
+	 */
+	if (is_cow_mapping(vma->vm_flags) &&
+	    !(vma->vm_flags & VM_ACCESS_FLAGS))
+		vm_flags_clear(vma, VM_MAYWRITE);
+
+	return drm_gem_ttm_mmap(obj, vma);
+}
+
+const struct drm_gem_object_funcs amdgpu_gem_object_funcs = {
+	.free = amdgpu_gem_object_free,
+	.open = amdgpu_gem_object_open,
+	.close = amdgpu_gem_object_close,
+	.export = amdgpu_gem_prime_export,
+	.vmap = drm_gem_ttm_vmap,
+	.vunmap = drm_gem_ttm_vunmap,
+	.mmap = amdgpu_gem_object_mmap,
+	.vm_ops = &amdgpu_gem_vm_ops,
+};
+
+/*
+ * GEM ioctls.
+ */
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	union drm_amdgpu_gem_create *args = data;
+	uint64_t flags = args->in.domain_flags;
+	uint64_t size = args->in.bo_size;
+	struct dma_resv *resv = NULL;
+	struct drm_gem_object *gobj;
+	uint32_t handle, initial_domain;
+	int r;
+
+	/* reject invalid gem flags */
+	if (flags & ~AMDGPU_GEM_CREATE_SETTABLE_MASK)
+		return -EINVAL;
+
+	/* reject invalid gem domains */
+	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
+		return -EINVAL;
+
+	if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
+		DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n");
+		return -EINVAL;
+	}
+
+	/* always clear VRAM */
+	flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+	if (args->in.domains & AMDGPU_GEM_DOMAIN_MMIO_REMAP)
+		return -EINVAL;
+
+	/* create a gem object to contain this object in */
+	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
+	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+		if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+			/* if gds bo is created from user space, it must be
+			 * passed to bo list
+			 */
+			DRM_ERROR("GDS bo cannot be per-vm-bo\n");
+			return -EINVAL;
+		}
+		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	}
+
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		r = amdgpu_bo_reserve(vm->root.bo, false);
+		if (r)
+			return r;
+
+		resv = vm->root.bo->tbo.base.resv;
+	}
+
+	initial_domain = (u32)(0xffffffff & args->in.domains);
+retry:
+	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
+				     initial_domain,
+				     flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1);
+	if (r && r != -ERESTARTSYS) {
+		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+			flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+			goto retry;
+		}
+
+		if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+			initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+			goto retry;
+		}
+		DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+				size, initial_domain, args->in.alignment, r);
+	}
+
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		if (!r) {
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			abo->parent = amdgpu_bo_ref(vm->root.bo);
+		}
+		amdgpu_bo_unreserve(vm->root.bo);
+	}
+	if (r)
+		return r;
+
+	r = drm_gem_handle_create(filp, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(gobj);
+	if (r)
+		return r;
+
+	memset(args, 0, sizeof(*args));
+	args->out.handle = handle;
+	return 0;
+}
+
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp)
+{
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_amdgpu_gem_userptr *args = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct drm_gem_object *gobj;
+	struct amdgpu_hmm_range *range;
+	struct amdgpu_bo *bo;
+	uint32_t handle;
+	int r;
+
+	args->addr = untagged_addr(args->addr);
+
+	if (offset_in_page(args->addr | args->size))
+		return -EINVAL;
+
+	/* reject unknown flag values */
+	if (args->flags & ~(AMDGPU_GEM_USERPTR_READONLY |
+	    AMDGPU_GEM_USERPTR_ANONONLY | AMDGPU_GEM_USERPTR_VALIDATE |
+	    AMDGPU_GEM_USERPTR_REGISTER))
+		return -EINVAL;
+
+	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+
+		/* if we want to write to it we must install a MMU notifier */
+		return -EACCES;
+	}
+
+	/* create a gem object to contain this object in */
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+				     0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
+	if (r)
+		return r;
+
+	bo = gem_to_amdgpu_bo(gobj);
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	r = amdgpu_ttm_tt_set_userptr(&bo->tbo, args->addr, args->flags);
+	if (r)
+		goto release_object;
+
+	r = amdgpu_hmm_register(bo, args->addr);
+	if (r)
+		goto release_object;
+
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
+		range = amdgpu_hmm_range_alloc(NULL);
+		if (unlikely(!range))
+			return -ENOMEM;
+		r = amdgpu_ttm_tt_get_user_pages(bo, range);
+		if (r) {
+			amdgpu_hmm_range_free(range);
+			goto release_object;
+		}
+		r = amdgpu_bo_reserve(bo, true);
+		if (r)
+			goto user_pages_done;
+
+		amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		amdgpu_bo_unreserve(bo);
+		if (r)
+			goto user_pages_done;
+	}
+
+	r = drm_gem_handle_create(filp, gobj, &handle);
+	if (r)
+		goto user_pages_done;
+
+	args->handle = handle;
+
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_hmm_range_free(range);
+release_object:
+	drm_gem_object_put(gobj);
+
+	return r;
+}
+
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+			  struct drm_device *dev,
+			  uint32_t handle, uint64_t *offset_p)
+{
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *robj;
+
+	gobj = drm_gem_object_lookup(filp, handle);
+	if (!gobj)
+		return -ENOENT;
+
+	robj = gem_to_amdgpu_bo(gobj);
+	if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
+	    (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
+		drm_gem_object_put(gobj);
+		return -EPERM;
+	}
+	*offset_p = amdgpu_bo_mmap_offset(robj);
+	drm_gem_object_put(gobj);
+	return 0;
+}
+
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	union drm_amdgpu_gem_mmap *args = data;
+	uint32_t handle = args->in.handle;
+
+	memset(args, 0, sizeof(*args));
+	return amdgpu_mode_dumb_mmap(filp, dev, handle, &args->out.addr_ptr);
+}
+
+/**
+ * amdgpu_gem_timeout - calculate jiffies timeout from absolute value
+ *
+ * @timeout_ns: timeout in ns
+ *
+ * Calculate the timeout in jiffies from an absolute timeout in ns.
+ */
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns)
+{
+	unsigned long timeout_jiffies;
+	ktime_t timeout;
+
+	/* clamp timeout if it's to large */
+	if (((int64_t)timeout_ns) < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
+	if (ktime_to_ns(timeout) < 0)
+		return 0;
+
+	timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
+	/*  clamp timeout to avoid unsigned-> signed overflow */
+	if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT)
+		return MAX_SCHEDULE_TIMEOUT - 1;
+
+	return timeout_jiffies;
+}
+
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp)
+{
+	union drm_amdgpu_gem_wait_idle *args = data;
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *robj;
+	uint32_t handle = args->in.handle;
+	unsigned long timeout = amdgpu_gem_timeout(args->in.timeout);
+	int r = 0;
+	long ret;
+
+	gobj = drm_gem_object_lookup(filp, handle);
+	if (!gobj)
+		return -ENOENT;
+
+	robj = gem_to_amdgpu_bo(gobj);
+	ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+				    true, timeout);
+
+	/* ret == 0 means not signaled,
+	 * ret > 0 means signaled
+	 * ret < 0 means interrupted before timeout
+	 */
+	if (ret >= 0) {
+		memset(args, 0, sizeof(*args));
+		args->out.status = (ret == 0);
+	} else
+		r = ret;
+
+	drm_gem_object_put(gobj);
+	return r;
+}
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_amdgpu_gem_metadata *args = data;
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *robj;
+	int r = -1;
+
+	DRM_DEBUG("%d\n", args->handle);
+	gobj = drm_gem_object_lookup(filp, args->handle);
+	if (gobj == NULL)
+		return -ENOENT;
+	robj = gem_to_amdgpu_bo(gobj);
+
+	r = amdgpu_bo_reserve(robj, false);
+	if (unlikely(r != 0))
+		goto out;
+
+	if (args->op == AMDGPU_GEM_METADATA_OP_GET_METADATA) {
+		amdgpu_bo_get_tiling_flags(robj, &args->data.tiling_info);
+		r = amdgpu_bo_get_metadata(robj, args->data.data,
+					   sizeof(args->data.data),
+					   &args->data.data_size_bytes,
+					   &args->data.flags);
+	} else if (args->op == AMDGPU_GEM_METADATA_OP_SET_METADATA) {
+		if (args->data.data_size_bytes > sizeof(args->data.data)) {
+			r = -EINVAL;
+			goto unreserve;
+		}
+		r = amdgpu_bo_set_tiling_flags(robj, args->data.tiling_info);
+		if (!r)
+			r = amdgpu_bo_set_metadata(robj, args->data.data,
+						   args->data.data_size_bytes,
+						   args->data.flags);
+	}
+
+unreserve:
+	amdgpu_bo_unreserve(robj);
+out:
+	drm_gem_object_put(gobj);
+	return r;
+}
+
+/**
+ * amdgpu_gem_va_update_vm -update the bo_va in its VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm to update
+ * @bo_va: bo_va to update
+ * @operation: map, unmap or clear
+ *
+ * Update the bo_va directly after setting its address. Errors are not
+ * vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
+ */
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+			struct amdgpu_vm *vm,
+			struct amdgpu_bo_va *bo_va,
+			uint32_t operation)
+{
+	struct dma_fence *fence = dma_fence_get_stub();
+	int r;
+
+	if (!amdgpu_vm_ready(vm))
+		return fence;
+
+	r = amdgpu_vm_clear_freed(adev, vm, &fence);
+	if (r)
+		goto error;
+
+	if (operation == AMDGPU_VA_OP_MAP ||
+	    operation == AMDGPU_VA_OP_REPLACE) {
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			goto error;
+	}
+
+	r = amdgpu_vm_update_pdes(adev, vm, false);
+
+error:
+	if (r && r != -ERESTARTSYS)
+		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+
+	return fence;
+}
+
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp)
+{
+	const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE |
+		AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK |
+		AMDGPU_VM_PAGE_NOALLOC;
+	const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE |
+		AMDGPU_VM_PAGE_PRT;
+
+	struct drm_amdgpu_gem_va *args = data;
+	struct drm_gem_object *gobj;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_bo *abo;
+	struct amdgpu_bo_va *bo_va;
+	struct drm_syncobj *timeline_syncobj = NULL;
+	struct dma_fence_chain *timeline_chain = NULL;
+	struct dma_fence *fence;
+	struct drm_exec exec;
+	uint64_t vm_size;
+	int r = 0;
+
+	if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) {
+		dev_dbg(dev->dev,
+			"va_address 0x%llx is in reserved area 0x%llx\n",
+			args->va_address, AMDGPU_VA_RESERVED_BOTTOM);
+		return -EINVAL;
+	}
+
+	if (args->va_address >= AMDGPU_GMC_HOLE_START &&
+	    args->va_address < AMDGPU_GMC_HOLE_END) {
+		dev_dbg(dev->dev,
+			"va_address 0x%llx is in VA hole 0x%llx-0x%llx\n",
+			args->va_address, AMDGPU_GMC_HOLE_START,
+			AMDGPU_GMC_HOLE_END);
+		return -EINVAL;
+	}
+
+	args->va_address &= AMDGPU_GMC_HOLE_MASK;
+
+	vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+	vm_size -= AMDGPU_VA_RESERVED_TOP;
+	if (args->va_address + args->map_size > vm_size) {
+		dev_dbg(dev->dev,
+			"va_address 0x%llx is in top reserved area 0x%llx\n",
+			args->va_address + args->map_size, vm_size);
+		return -EINVAL;
+	}
+
+	if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
+		dev_dbg(dev->dev, "invalid flags combination 0x%08X\n",
+			args->flags);
+		return -EINVAL;
+	}
+
+	switch (args->operation) {
+	case AMDGPU_VA_OP_MAP:
+	case AMDGPU_VA_OP_UNMAP:
+	case AMDGPU_VA_OP_CLEAR:
+	case AMDGPU_VA_OP_REPLACE:
+		break;
+	default:
+		dev_dbg(dev->dev, "unsupported operation %d\n",
+			args->operation);
+		return -EINVAL;
+	}
+
+	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
+	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
+		gobj = drm_gem_object_lookup(filp, args->handle);
+		if (gobj == NULL)
+			return -ENOENT;
+		abo = gem_to_amdgpu_bo(gobj);
+	} else {
+		gobj = NULL;
+		abo = NULL;
+	}
+
+	r = amdgpu_gem_add_input_fence(filp,
+				       args->input_fence_syncobj_handles,
+				       args->num_syncobj_handles);
+	if (r)
+		goto error_put_gobj;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+		      DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		if (gobj) {
+			r = drm_exec_lock_obj(&exec, gobj);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(r))
+				goto error;
+		}
+
+		r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	if (abo) {
+		bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo);
+		if (!bo_va) {
+			r = -ENOENT;
+			goto error;
+		}
+	} else if (args->operation != AMDGPU_VA_OP_CLEAR) {
+		bo_va = fpriv->prt_va;
+	} else {
+		bo_va = NULL;
+	}
+
+	r = amdgpu_gem_update_timeline_node(filp,
+					    args->vm_timeline_syncobj_out,
+					    args->vm_timeline_point,
+					    &timeline_syncobj,
+					    &timeline_chain);
+	if (r)
+		goto error;
+
+	switch (args->operation) {
+	case AMDGPU_VA_OP_MAP:
+		r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
+				     args->offset_in_bo, args->map_size,
+				     args->flags);
+		break;
+	case AMDGPU_VA_OP_UNMAP:
+		r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
+		break;
+
+	case AMDGPU_VA_OP_CLEAR:
+		r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm,
+						args->va_address,
+						args->map_size);
+		break;
+	case AMDGPU_VA_OP_REPLACE:
+		r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
+					     args->offset_in_bo, args->map_size,
+					     args->flags);
+		break;
+	default:
+		break;
+	}
+	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+		fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+						args->operation);
+
+		if (timeline_syncobj)
+			amdgpu_gem_update_bo_mapping(filp, bo_va,
+					     args->operation,
+					     args->vm_timeline_point,
+					     fence, timeline_syncobj,
+					     timeline_chain);
+		else
+			dma_fence_put(fence);
+
+	}
+
+error:
+	drm_exec_fini(&exec);
+error_put_gobj:
+	drm_gem_object_put(gobj);
+	return r;
+}
+
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp)
+{
+	struct drm_amdgpu_gem_op *args = data;
+	struct drm_gem_object *gobj;
+	struct amdgpu_vm_bo_base *base;
+	struct amdgpu_bo *robj;
+	struct drm_exec exec;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r;
+
+	if (args->padding)
+		return -EINVAL;
+
+	gobj = drm_gem_object_lookup(filp, args->handle);
+	if (!gobj)
+		return -ENOENT;
+
+	robj = gem_to_amdgpu_bo(gobj);
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+			  DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_lock_obj(&exec, gobj);
+		drm_exec_retry_on_contention(&exec);
+		if (r)
+			goto out_exec;
+
+		if (args->op == AMDGPU_GEM_OP_GET_MAPPING_INFO) {
+			r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
+			drm_exec_retry_on_contention(&exec);
+			if (r)
+				goto out_exec;
+		}
+	}
+
+	switch (args->op) {
+	case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
+		struct drm_amdgpu_gem_create_in info;
+		void __user *out = u64_to_user_ptr(args->value);
+
+		info.bo_size = robj->tbo.base.size;
+		info.alignment = robj->tbo.page_alignment << PAGE_SHIFT;
+		info.domains = robj->preferred_domains;
+		info.domain_flags = robj->flags;
+		drm_exec_fini(&exec);
+		if (copy_to_user(out, &info, sizeof(info)))
+			r = -EFAULT;
+		break;
+	}
+	case AMDGPU_GEM_OP_SET_PLACEMENT:
+		if (drm_gem_is_imported(&robj->tbo.base) &&
+		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
+			r = -EINVAL;
+			goto out_exec;
+		}
+		if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
+			r = -EPERM;
+			goto out_exec;
+		}
+		for (base = robj->vm_bo; base; base = base->next)
+			if (amdgpu_xgmi_same_hive(amdgpu_ttm_adev(robj->tbo.bdev),
+				amdgpu_ttm_adev(base->vm->root.bo->tbo.bdev))) {
+				r = -EINVAL;
+				goto out_exec;
+			}
+
+
+		robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
+							AMDGPU_GEM_DOMAIN_GTT |
+							AMDGPU_GEM_DOMAIN_CPU);
+		robj->allowed_domains = robj->preferred_domains;
+		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
+		if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+			amdgpu_vm_bo_invalidate(robj, true);
+		drm_exec_fini(&exec);
+		break;
+	case AMDGPU_GEM_OP_GET_MAPPING_INFO: {
+		struct amdgpu_bo_va *bo_va = amdgpu_vm_bo_find(&fpriv->vm, robj);
+		struct drm_amdgpu_gem_vm_entry *vm_entries;
+		struct amdgpu_bo_va_mapping *mapping;
+		int num_mappings = 0;
+		/*
+		 * num_entries is set as an input to the size of the user-allocated array of
+		 * drm_amdgpu_gem_vm_entry stored at args->value.
+		 * num_entries is sent back as output as the number of mappings the bo has.
+		 * If that number is larger than the size of the array, the ioctl must
+		 * be retried.
+		 */
+		vm_entries = kvcalloc(args->num_entries, sizeof(*vm_entries), GFP_KERNEL);
+		if (!vm_entries)
+			return -ENOMEM;
+
+		amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) {
+			if (num_mappings < args->num_entries) {
+				vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+				vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+				vm_entries[num_mappings].offset = mapping->offset;
+				vm_entries[num_mappings].flags = mapping->flags;
+			}
+			num_mappings += 1;
+		}
+
+		amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) {
+			if (num_mappings < args->num_entries) {
+				vm_entries[num_mappings].addr = mapping->start * AMDGPU_GPU_PAGE_SIZE;
+				vm_entries[num_mappings].size = (mapping->last - mapping->start + 1) * AMDGPU_GPU_PAGE_SIZE;
+				vm_entries[num_mappings].offset = mapping->offset;
+				vm_entries[num_mappings].flags = mapping->flags;
+			}
+			num_mappings += 1;
+		}
+
+		drm_exec_fini(&exec);
+
+		if (num_mappings > 0 && num_mappings <= args->num_entries)
+			if (copy_to_user(u64_to_user_ptr(args->value), vm_entries, num_mappings * sizeof(*vm_entries)))
+				r = -EFAULT;
+
+		args->num_entries = num_mappings;
+
+		kvfree(vm_entries);
+		break;
+	}
+	default:
+		drm_exec_fini(&exec);
+		r = -EINVAL;
+	}
+
+	drm_gem_object_put(gobj);
+	return r;
+out_exec:
+	drm_exec_fini(&exec);
+	drm_gem_object_put(gobj);
+	return r;
+}
+
+/**
+ * amdgpu_gem_list_handles_ioctl - get information about a process' buffer objects
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_gem_list_handles
+ * @filp: drm file pointer
+ *
+ * num_entries is set as an input to the size of the entries array.
+ * num_entries is sent back as output as the number of bos in the process.
+ * If that number is larger than the size of the array, the ioctl must
+ * be retried.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *filp)
+{
+	struct drm_amdgpu_gem_list_handles *args = data;
+	struct drm_amdgpu_gem_list_handles_entry *bo_entries;
+	struct drm_gem_object *gobj;
+	int id, ret = 0;
+	int bo_index = 0;
+	int num_bos = 0;
+
+	spin_lock(&filp->table_lock);
+	idr_for_each_entry(&filp->object_idr, gobj, id)
+		num_bos += 1;
+	spin_unlock(&filp->table_lock);
+
+	if (args->num_entries < num_bos) {
+		args->num_entries = num_bos;
+		return 0;
+	}
+
+	if (num_bos == 0) {
+		args->num_entries = 0;
+		return 0;
+	}
+
+	bo_entries = kvcalloc(num_bos, sizeof(*bo_entries), GFP_KERNEL);
+	if (!bo_entries)
+		return -ENOMEM;
+
+	spin_lock(&filp->table_lock);
+	idr_for_each_entry(&filp->object_idr, gobj, id) {
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+		struct drm_amdgpu_gem_list_handles_entry *bo_entry;
+
+		if (bo_index >= num_bos) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		bo_entry = &bo_entries[bo_index];
+
+		bo_entry->size = amdgpu_bo_size(bo);
+		bo_entry->alloc_flags = bo->flags & AMDGPU_GEM_CREATE_SETTABLE_MASK;
+		bo_entry->preferred_domains = bo->preferred_domains;
+		bo_entry->gem_handle = id;
+		bo_entry->alignment = bo->tbo.page_alignment;
+
+		if (bo->tbo.base.import_attach)
+			bo_entry->flags |= AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT;
+
+		bo_index += 1;
+	}
+	spin_unlock(&filp->table_lock);
+
+	args->num_entries = bo_index;
+
+	if (!ret)
+		if (copy_to_user(u64_to_user_ptr(args->entries), bo_entries, num_bos * sizeof(*bo_entries)))
+			ret = -EFAULT;
+
+	kvfree(bo_entries);
+
+	return ret;
+}
+
+static int amdgpu_gem_align_pitch(struct amdgpu_device *adev,
+				  int width,
+				  int cpp,
+				  bool tiled)
+{
+	int aligned = width;
+	int pitch_mask = 0;
+
+	switch (cpp) {
+	case 1:
+		pitch_mask = 255;
+		break;
+	case 2:
+		pitch_mask = 127;
+		break;
+	case 3:
+	case 4:
+		pitch_mask = 63;
+		break;
+	}
+
+	aligned += pitch_mask;
+	aligned &= ~pitch_mask;
+	return aligned * cpp;
+}
+
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+			    struct drm_device *dev,
+			    struct drm_mode_create_dumb *args)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	struct drm_gem_object *gobj;
+	uint32_t handle;
+	u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		    AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+		    AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	u32 domain;
+	int r;
+
+	/*
+	 * The buffer returned from this function should be cleared, but
+	 * it can only be done if the ring is enabled or we'll fail to
+	 * create the buffer.
+	 */
+	if (adev->mman.buffer_funcs_enabled)
+		flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+
+	args->pitch = amdgpu_gem_align_pitch(adev, args->width,
+					     DIV_ROUND_UP(args->bpp, 8), 0);
+	args->size = (u64)args->pitch * args->height;
+	args->size = ALIGN(args->size, PAGE_SIZE);
+	domain = amdgpu_bo_get_preferred_domain(adev,
+				amdgpu_display_supported_domains(adev, flags));
+	r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,
+				     ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1);
+	if (r)
+		return -ENOMEM;
+
+	r = drm_gem_handle_create(file_priv, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_put(gobj);
+	if (r)
+		return r;
+
+	args->handle = handle;
+	return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_file *file;
+	int r;
+
+	r = mutex_lock_interruptible(&dev->filelist_mutex);
+	if (r)
+		return r;
+
+	list_for_each_entry(file, &dev->filelist, lhead) {
+		struct task_struct *task;
+		struct drm_gem_object *gobj;
+		struct pid *pid;
+		int id;
+
+		/*
+		 * Although we have a valid reference on file->pid, that does
+		 * not guarantee that the task_struct who called get_pid() is
+		 * still alive (e.g. get_pid(current) => fork() => exit()).
+		 * Therefore, we need to protect this ->comm access using RCU.
+		 */
+		rcu_read_lock();
+		pid = rcu_dereference(file->pid);
+		task = pid_task(pid, PIDTYPE_TGID);
+		seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
+			   task ? task->comm : "<unknown>");
+		rcu_read_unlock();
+
+		spin_lock(&file->table_lock);
+		idr_for_each_entry(&file->object_idr, gobj, id) {
+			struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+
+			amdgpu_bo_print_info(id, bo, m);
+		}
+		spin_unlock(&file->table_lock);
+	}
+
+	mutex_unlock(&dev->filelist_mutex);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_gem_info);
+
+#endif
+
+void amdgpu_debugfs_gem_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_gem_info", 0444, root, adev,
+			    &amdgpu_debugfs_gem_info_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
new file mode 100644
index 000000000000..b558336bc4c6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GEM_H__
+#define __AMDGPU_GEM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+
+/*
+ * GEM.
+ */
+
+#define AMDGPU_GEM_DOMAIN_MAX		0x3
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)
+
+extern const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
+
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+
+/*
+ * GEM objects.
+ */
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+			     int alignment, u32 initial_domain,
+			     u64 flags, enum ttm_bo_type type,
+			     struct dma_resv *resv,
+			     struct drm_gem_object **obj, int8_t xcp_id_plus1);
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+			    struct drm_device *dev,
+			    struct drm_mode_create_dumb *args);
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+			  struct drm_device *dev,
+			  uint32_t handle, uint64_t *offset_p);
+
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+int amdgpu_gem_list_handles_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *filp);
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+
+#define AMDGPU_GEM_CREATE_SETTABLE_MASK	(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | \
+	AMDGPU_GEM_CREATE_NO_CPU_ACCESS | \
+	AMDGPU_GEM_CREATE_CPU_GTT_USWC | \
+	AMDGPU_GEM_CREATE_VRAM_CLEARED | \
+	AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | \
+	AMDGPU_GEM_CREATE_EXPLICIT_SYNC | \
+	AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE | \
+	AMDGPU_GEM_CREATE_ENCRYPTED | \
+	AMDGPU_GEM_CREATE_GFX12_DCC | \
+	AMDGPU_GEM_CREATE_DISCARDABLE | \
+	AMDGPU_GEM_CREATE_COHERENT | \
+	AMDGPU_GEM_CREATE_UNCACHED | \
+	AMDGPU_GEM_CREATE_EXT_COHERENT)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
new file mode 100644
index 000000000000..8b118c53f351
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -0,0 +1,2557 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_rlc.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_mes.h"
+#include "nvd.h"
+
+/* delay 0.1 second to enable gfx off feature */
+#define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
+
+#define GFX_OFF_NO_DELAY 0
+
+/*
+ * GPU GFX IP block helpers function.
+ */
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+				int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return bit;
+}
+
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+				 int *mec, int *pipe, int *queue)
+{
+	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
+	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+		% adev->gfx.mec.num_pipe_per_mec;
+	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+	       / adev->gfx.mec.num_pipe_per_mec;
+
+}
+
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
+				     int xcc_id, int mec, int pipe, int queue)
+{
+	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
+			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
+}
+
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+				      int me, int pipe, int queue)
+{
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+	int bit = 0;
+
+	bit += me * adev->gfx.me.num_pipe_per_me
+		* num_queue_per_pipe;
+	bit += pipe * num_queue_per_pipe;
+	bit += queue;
+
+	return bit;
+}
+
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
+				    int me, int pipe, int queue)
+{
+	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
+			adev->gfx.me.queue_bitmap);
+}
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
+{
+	unsigned int se, sh, cu;
+	const char *p;
+
+	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+		return;
+
+	p = amdgpu_disable_cu;
+	for (;;) {
+		char *next;
+		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+
+		if (ret < 3) {
+			DRM_ERROR("amdgpu: could not parse disable_cu\n");
+			return;
+		}
+
+		if (se < max_se && sh < max_sh && cu < 16) {
+			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+			mask[se * max_sh + sh] |= 1u << cu;
+		} else {
+			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+				  se, sh, cu);
+		}
+
+		next = strchr(p, ',');
+		if (!next)
+			break;
+		p = next + 1;
+	}
+}
+
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
+{
+	if (amdgpu_compute_multipipe != -1) {
+		dev_info(adev->dev, "amdgpu: forcing compute pipe policy %d\n",
+			 amdgpu_compute_multipipe);
+		return amdgpu_compute_multipipe == 1;
+	}
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
+		return true;
+
+	/* FIXME: spreading the queues across pipes causes perf regressions
+	 * on POLARIS11 compute workloads */
+	if (adev->asic_type == CHIP_POLARIS11)
+		return false;
+
+	return adev->gfx.mec.num_mec > 1;
+}
+
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+						struct amdgpu_ring *ring)
+{
+	int queue = ring->queue;
+	int pipe = ring->pipe;
+
+	/* Policy: use pipe1 queue0 as high priority graphics queue if we
+	 * have more than one gfx pipe.
+	 */
+	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+		int me = ring->me;
+		int bit;
+
+		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+		if (ring == &adev->gfx.gfx_ring[bit])
+			return true;
+	}
+
+	return false;
+}
+
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+					       struct amdgpu_ring *ring)
+{
+	/* Policy: use 1st queue as high priority compute queue if we
+	 * have more than one compute queue.
+	 */
+	if (adev->gfx.num_compute_rings > 1 &&
+	    ring == &adev->gfx.compute_ring[0])
+		return true;
+
+	return false;
+}
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, j, queue, pipe;
+	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
+	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+				     adev->gfx.mec.num_queue_per_pipe,
+				     adev->gfx.num_compute_rings);
+	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+
+	if (multipipe_policy) {
+		/* policy: make queues evenly cross all pipes on MEC1 only
+		 * for multiple xcc, just use the original policy for simplicity */
+		for (j = 0; j < num_xcc; j++) {
+			for (i = 0; i < max_queues_per_mec; i++) {
+				pipe = i % adev->gfx.mec.num_pipe_per_mec;
+				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+					 adev->gfx.mec.num_queue_per_pipe;
+
+				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+					adev->gfx.mec_bitmap[j].queue_bitmap);
+			}
+		}
+	} else {
+		/* policy: amdgpu owns all queues in the given pipe */
+		for (j = 0; j < num_xcc; j++) {
+			for (i = 0; i < max_queues_per_mec; ++i)
+				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
+		}
+	}
+
+	for (j = 0; j < num_xcc; j++) {
+		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
+			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
+	}
+}
+
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe;
+	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+	int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
+
+	if (multipipe_policy) {
+		/* policy: amdgpu owns the first queue per pipe at this stage
+		 * will extend to mulitple queues per pipe later */
+		for (i = 0; i < max_queues_per_me; i++) {
+			pipe = i % adev->gfx.me.num_pipe_per_me;
+			queue = (i / adev->gfx.me.num_pipe_per_me) %
+				num_queue_per_pipe;
+
+			set_bit(pipe * num_queue_per_pipe + queue,
+				adev->gfx.me.queue_bitmap);
+		}
+	} else {
+		for (i = 0; i < max_queues_per_me; ++i)
+			set_bit(i, adev->gfx.me.queue_bitmap);
+	}
+
+	/* update the number of active graphics rings */
+	if (adev->gfx.num_gfx_rings)
+		adev->gfx.num_gfx_rings =
+			bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+}
+
+static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring, int xcc_id)
+{
+	int queue_bit;
+	int mec, pipe, queue;
+
+	queue_bit = adev->gfx.mec.num_mec
+		    * adev->gfx.mec.num_pipe_per_mec
+		    * adev->gfx.mec.num_queue_per_pipe;
+
+	while (--queue_bit >= 0) {
+		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
+			continue;
+
+		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+		/*
+		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
+		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
+		 * only can be issued on queue 0.
+		 */
+		if ((mec == 1 && pipe > 1) || queue != 0)
+			continue;
+
+		ring->me = mec + 1;
+		ring->pipe = pipe;
+		ring->queue = queue;
+
+		return 0;
+	}
+
+	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
+	return -EINVAL;
+}
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_irq_src *irq = &kiq->irq;
+	struct amdgpu_ring *ring = &kiq->ring;
+	int r = 0;
+
+	spin_lock_init(&kiq->ring_lock);
+
+	ring->adev = NULL;
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->xcc_id = xcc_id;
+	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
+	ring->doorbell_index =
+		(adev->doorbell_index.kiq +
+		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
+		<< 1;
+
+	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
+	if (r)
+		return r;
+
+	ring->eop_gpu_addr = kiq->eop_gpu_addr;
+	ring->no_scheduler = true;
+	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
+		 (unsigned char)xcc_id, (unsigned char)ring->me,
+		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
+	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
+			     AMDGPU_RING_PRIO_DEFAULT, NULL);
+	if (r)
+		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+
+	return r;
+}
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_fini(ring);
+}
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
+}
+
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+			unsigned int hpd_size, int xcc_id)
+{
+	int r;
+	u32 *hpd;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
+				    &kiq->eop_gpu_addr, (void **)&hpd);
+	if (r) {
+		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
+		return r;
+	}
+
+	memset(hpd, 0, hpd_size);
+
+	r = amdgpu_bo_reserve(kiq->eop_obj, true);
+	if (unlikely(r != 0))
+		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
+	amdgpu_bo_kunmap(kiq->eop_obj);
+	amdgpu_bo_unreserve(kiq->eop_obj);
+
+	return 0;
+}
+
+/* create MQD for each compute/gfx queue */
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+			   unsigned int mqd_size, int xcc_id)
+{
+	int r, i, j;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *ring = &kiq->ring;
+	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
+
+#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
+	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+		domain |= AMDGPU_GEM_DOMAIN_VRAM;
+#endif
+
+	/* create MQD for KIQ */
+	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
+		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
+		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
+		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
+		 * KIQ MQD no matter SRIOV or Bare-metal
+		 */
+		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM |
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &ring->mqd_obj,
+					    &ring->mqd_gpu_addr,
+					    &ring->mqd_ptr);
+		if (r) {
+			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+			return r;
+		}
+
+		/* prepare MQD backup */
+		kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
+		if (!kiq->mqd_backup) {
+			dev_warn(adev->dev,
+				 "no memory to create MQD backup for ring %s\n", ring->name);
+			return -ENOMEM;
+		}
+	}
+
+	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+		/* create MQD for each KGQ */
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			if (!ring->mqd_obj) {
+				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+							    domain, &ring->mqd_obj,
+							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+				if (r) {
+					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+					return r;
+				}
+
+				ring->mqd_size = mqd_size;
+				/* prepare MQD backup */
+				adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
+				if (!adev->gfx.me.mqd_backup[i]) {
+					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+					return -ENOMEM;
+				}
+			}
+		}
+	}
+
+	/* create MQD for each KCQ */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		j = i + xcc_id * adev->gfx.num_compute_rings;
+		ring = &adev->gfx.compute_ring[j];
+		if (!ring->mqd_obj) {
+			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+						    domain, &ring->mqd_obj,
+						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+			if (r) {
+				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+				return r;
+			}
+
+			ring->mqd_size = mqd_size;
+			/* prepare MQD backup */
+			adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
+			if (!adev->gfx.mec.mqd_backup[j]) {
+				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	return 0;
+}
+
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_ring *ring = NULL;
+	int i, j;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+
+	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			kfree(adev->gfx.me.mqd_backup[i]);
+			amdgpu_bo_free_kernel(&ring->mqd_obj,
+					      &ring->mqd_gpu_addr,
+					      &ring->mqd_ptr);
+		}
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		j = i + xcc_id * adev->gfx.num_compute_rings;
+		ring = &adev->gfx.compute_ring[j];
+		kfree(adev->gfx.mec.mqd_backup[j]);
+		amdgpu_bo_free_kernel(&ring->mqd_obj,
+				      &ring->mqd_gpu_addr,
+				      &ring->mqd_ptr);
+	}
+
+	ring = &kiq->ring;
+	kfree(kiq->mqd_backup);
+	amdgpu_bo_free_kernel(&ring->mqd_obj,
+			      &ring->mqd_gpu_addr,
+			      &ring->mqd_ptr);
+}
+
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	int i, r = 0;
+	int j;
+
+	if (adev->enable_mes) {
+		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+			j = i + xcc_id * adev->gfx.num_compute_rings;
+			amdgpu_mes_unmap_legacy_queue(adev,
+						   &adev->gfx.compute_ring[j],
+						   RESET_QUEUES, 0, 0);
+		}
+		return 0;
+	}
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+		return 0;
+
+	spin_lock(&kiq->ring_lock);
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+					adev->gfx.num_compute_rings)) {
+		spin_unlock(&kiq->ring_lock);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		j = i + xcc_id * adev->gfx.num_compute_rings;
+		kiq->pmf->kiq_unmap_queues(kiq_ring,
+					   &adev->gfx.compute_ring[j],
+					   RESET_QUEUES, 0, 0);
+	}
+	/* Submit unmap queue packet */
+	amdgpu_ring_commit(kiq_ring);
+	/*
+	 * Ring test will do a basic scratch register change check. Just run
+	 * this to ensure that unmap queues that is submitted before got
+	 * processed successfully before returning.
+	 */
+	r = amdgpu_ring_test_helper(kiq_ring);
+
+	spin_unlock(&kiq->ring_lock);
+
+	return r;
+}
+
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	int i, r = 0;
+	int j;
+
+	if (adev->enable_mes) {
+		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+				j = i + xcc_id * adev->gfx.num_gfx_rings;
+				amdgpu_mes_unmap_legacy_queue(adev,
+						      &adev->gfx.gfx_ring[j],
+						      PREEMPT_QUEUES, 0, 0);
+			}
+		}
+		return 0;
+	}
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
+		return 0;
+
+	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+		spin_lock(&kiq->ring_lock);
+		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+						adev->gfx.num_gfx_rings)) {
+			spin_unlock(&kiq->ring_lock);
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			j = i + xcc_id * adev->gfx.num_gfx_rings;
+			kiq->pmf->kiq_unmap_queues(kiq_ring,
+						   &adev->gfx.gfx_ring[j],
+						   PREEMPT_QUEUES, 0, 0);
+		}
+		/* Submit unmap queue packet */
+		amdgpu_ring_commit(kiq_ring);
+
+		/*
+		 * Ring test will do a basic scratch register change check.
+		 * Just run this to ensure that unmap queues that is submitted
+		 * before got processed successfully before returning.
+		 */
+		r = amdgpu_ring_test_helper(kiq_ring);
+		spin_unlock(&kiq->ring_lock);
+	}
+
+	return r;
+}
+
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+					int queue_bit)
+{
+	int mec, pipe, queue;
+	int set_resource_bit = 0;
+
+	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
+
+	return set_resource_bit;
+}
+
+static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	uint64_t queue_mask = ~0ULL;
+	int r, i, j;
+
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	if (!adev->enable_uni_mes) {
+		spin_lock(&kiq->ring_lock);
+		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
+		if (r) {
+			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+			spin_unlock(&kiq->ring_lock);
+			return r;
+		}
+
+		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+		r = amdgpu_ring_test_helper(kiq_ring);
+		spin_unlock(&kiq->ring_lock);
+		if (r)
+			dev_err(adev->dev, "KIQ failed to set resources\n");
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		j = i + xcc_id * adev->gfx.num_compute_rings;
+		r = amdgpu_mes_map_legacy_queue(adev,
+						&adev->gfx.compute_ring[j]);
+		if (r) {
+			dev_err(adev->dev, "failed to map compute queue\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	uint64_t queue_mask = 0;
+	int r, i, j;
+
+	if (adev->mes.enable_legacy_queue_map)
+		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
+
+	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
+		return -EINVAL;
+
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			dev_err(adev->dev, "Invalid KCQ enabled: %d\n", i);
+			break;
+		}
+
+		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
+	}
+
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	dev_info(adev->dev, "kiq ring mec %d pipe %d q %d\n", kiq_ring->me,
+		 kiq_ring->pipe, kiq_ring->queue);
+
+	spin_lock(&kiq->ring_lock);
+	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+					adev->gfx.num_compute_rings +
+					kiq->pmf->set_resources_size);
+	if (r) {
+		dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+		spin_unlock(&kiq->ring_lock);
+		return r;
+	}
+
+	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		j = i + xcc_id * adev->gfx.num_compute_rings;
+		kiq->pmf->kiq_map_queues(kiq_ring,
+					 &adev->gfx.compute_ring[j]);
+	}
+	/* Submit map queue packet */
+	amdgpu_ring_commit(kiq_ring);
+	/*
+	 * Ring test will do a basic scratch register change check. Just run
+	 * this to ensure that map queues that is submitted before got
+	 * processed successfully before returning.
+	 */
+	r = amdgpu_ring_test_helper(kiq_ring);
+	spin_unlock(&kiq->ring_lock);
+	if (r)
+		dev_err(adev->dev, "KCQ enable failed\n");
+
+	return r;
+}
+
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	int r, i, j;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+		return -EINVAL;
+
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	if (adev->mes.enable_legacy_queue_map) {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			j = i + xcc_id * adev->gfx.num_gfx_rings;
+			r = amdgpu_mes_map_legacy_queue(adev,
+							&adev->gfx.gfx_ring[j]);
+			if (r) {
+				dev_err(adev->dev, "failed to map gfx queue\n");
+				return r;
+			}
+		}
+
+		return 0;
+	}
+
+	spin_lock(&kiq->ring_lock);
+	/* No need to map kcq on the slave */
+	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+						adev->gfx.num_gfx_rings);
+		if (r) {
+			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
+			spin_unlock(&kiq->ring_lock);
+			return r;
+		}
+
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			j = i + xcc_id * adev->gfx.num_gfx_rings;
+			kiq->pmf->kiq_map_queues(kiq_ring,
+						 &adev->gfx.gfx_ring[j]);
+		}
+	}
+	/* Submit map queue packet */
+	amdgpu_ring_commit(kiq_ring);
+	/*
+	 * Ring test will do a basic scratch register change check. Just run
+	 * this to ensure that map queues that is submitted before got
+	 * processed successfully before returning.
+	 */
+	r = amdgpu_ring_test_helper(kiq_ring);
+	spin_unlock(&kiq->ring_lock);
+	if (r)
+		dev_err(adev->dev, "KGQ enable failed\n");
+
+	return r;
+}
+
+static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
+				   bool no_delay)
+{
+	unsigned long delay = GFX_OFF_DELAY_ENABLE;
+
+	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+		return;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	if (enable) {
+		/* If the count is already 0, it means there's an imbalance bug somewhere.
+		 * Note that the bug may be in a different caller than the one which triggers the
+		 * WARN_ON_ONCE.
+		 */
+		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+			goto unlock;
+
+		adev->gfx.gfx_off_req_count--;
+
+		if (adev->gfx.gfx_off_req_count == 0 &&
+		    !adev->gfx.gfx_off_state) {
+			/* If going to s2idle, no need to wait */
+			if (no_delay) {
+				if (!amdgpu_dpm_set_powergating_by_smu(adev,
+						AMD_IP_BLOCK_TYPE_GFX, true, 0))
+					adev->gfx.gfx_off_state = true;
+			} else {
+				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+					      delay);
+			}
+		}
+	} else {
+		if (adev->gfx.gfx_off_req_count == 0) {
+			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+			if (adev->gfx.gfx_off_state &&
+			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
+				adev->gfx.gfx_off_state = false;
+
+				if (adev->gfx.funcs->init_spm_golden) {
+					dev_dbg(adev->dev,
+						"GFXOFF is disabled, re-init SPM golden settings\n");
+					amdgpu_gfx_init_spm_golden(adev);
+				}
+			}
+		}
+
+		adev->gfx.gfx_off_req_count++;
+	}
+
+unlock:
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+}
+
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
+ */
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+	/* If going to s2idle, no need to wait */
+	bool no_delay = adev->in_s0ix ? true : false;
+
+	amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
+}
+
+/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be issued immediately.
+ */
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
+{
+	amdgpu_gfx_do_off_ctrl(adev, enable, true);
+}
+
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
+{
+
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_status_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
+			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+			if (r)
+				return r;
+		}
+
+		r = amdgpu_ras_block_late_init(adev, ras_block);
+		if (r)
+			return r;
+
+		if (amdgpu_sriov_vf(adev))
+			return r;
+
+		if (adev->gfx.cp_ecc_error_irq.funcs) {
+			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+			if (r)
+				goto late_fini;
+		}
+	} else {
+		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+	}
+
+	return 0;
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err = 0;
+	struct amdgpu_gfx_ras *ras = NULL;
+
+	/* adev->gfx.ras is NULL, which means gfx does not
+	 * support ras function, then do nothing here.
+	 */
+	if (!adev->gfx.ras)
+		return 0;
+
+	ras = adev->gfx.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register gfx ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "gfx");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->gfx.ras_if = &ras->ras_block.ras_comm;
+
+	/* If not define special ras_late_init function, use gfx default ras_late_init */
+	if (!ras->ras_block.ras_late_init)
+		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
+
+	/* If not defined special ras_cb function, use default ras_cb */
+	if (!ras->ras_block.ras_cb)
+		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
+
+	return 0;
+}
+
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+						struct amdgpu_iv_entry *entry)
+{
+	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
+		return adev->gfx.ras->poison_consumption_handler(adev, entry);
+
+	return 0;
+}
+
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry)
+{
+	/* TODO ue will trigger an interrupt.
+	 *
+	 * When “Full RAS” is enabled, the per-IP interrupt sources should
+	 * be disabled and the driver should only look for the aggregated
+	 * interrupt via sync flood
+	 */
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
+		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
+		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
+			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
+		amdgpu_ras_reset_gpu(adev);
+	}
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->gfx.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	dev_err(adev->dev, "CP ECC ERROR IRQ\n");
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
+
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+		void *ras_error_status,
+		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+				int xcc_id))
+{
+	int i;
+	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	if (err_data) {
+		err_data->ue_count = 0;
+		err_data->ce_count = 0;
+	}
+
+	for_each_inst(i, xcc_mask)
+		func(adev, ras_error_status, i);
+}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
+{
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq, reg_val_offs = 0, value = 0;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *ring = &kiq->ring;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (adev->mes.ring[0].sched.ready)
+		return amdgpu_mes_rreg(adev, reg);
+
+	BUG_ON(!ring->funcs->emit_rreg);
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+		pr_err("critical bug! too many kiq readers\n");
+		goto failed_unlock;
+	}
+	r = amdgpu_ring_alloc(ring, 32);
+	if (r)
+		goto failed_unlock;
+
+	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+		goto failed_kiq_read;
+
+	might_sleep();
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		if (amdgpu_in_reset(adev))
+			goto failed_kiq_read;
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_read;
+
+	mb();
+	value = adev->wb.wb[reg_val_offs];
+	amdgpu_device_wb_free(adev, reg_val_offs);
+	return value;
+
+failed_undo:
+	amdgpu_ring_undo(ring);
+failed_unlock:
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_read:
+	if (reg_val_offs)
+		amdgpu_device_wb_free(adev, reg_val_offs);
+	dev_err(adev->dev, "failed to read reg:%x\n", reg);
+	return ~0;
+}
+
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
+{
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
+	struct amdgpu_ring *ring = &kiq->ring;
+
+	BUG_ON(!ring->funcs->emit_wreg);
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (adev->mes.ring[0].sched.ready) {
+		amdgpu_mes_wreg(adev, reg, v);
+		return;
+	}
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_ring_alloc(ring, 32);
+	if (r)
+		goto failed_unlock;
+
+	amdgpu_ring_emit_wreg(ring, reg, v);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+		goto failed_kiq_write;
+
+	might_sleep();
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		if (amdgpu_in_reset(adev))
+			goto failed_kiq_write;
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq_write;
+
+	return;
+
+failed_undo:
+	amdgpu_ring_undo(ring);
+failed_unlock:
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_write:
+	dev_err(adev->dev, "failed to write reg:%x\n", reg);
+}
+
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
+{
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *ring = &kiq->ring;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (adev->enable_mes_kiq && adev->mes.ring[0].sched.ready)
+		return amdgpu_mes_hdp_flush(adev);
+
+	if (!ring->funcs->emit_hdp_flush) {
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+	r = amdgpu_ring_alloc(ring, 32);
+	if (r)
+		goto failed_unlock;
+
+	amdgpu_ring_emit_hdp_flush(ring);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+		goto failed_kiq_hdp_flush;
+
+	might_sleep();
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		if (amdgpu_in_reset(adev))
+			goto failed_kiq_hdp_flush;
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY) {
+		dev_err(adev->dev, "failed to flush HDP via KIQ timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+
+failed_undo:
+	amdgpu_ring_undo(ring);
+failed_unlock:
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_hdp_flush:
+	dev_err(adev->dev, "failed to flush HDP via KIQ\n");
+	return r < 0 ? r : -EIO;
+}
+
+int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
+{
+	if (amdgpu_num_kcq == -1) {
+		return 8;
+	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
+		return 8;
+	}
+	return amdgpu_num_kcq;
+}
+
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+				  uint32_t ucode_id)
+{
+	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+	struct amdgpu_firmware_info *info = NULL;
+	const struct firmware *ucode_fw;
+	unsigned int fw_size;
+
+	switch (ucode_id) {
+	case AMDGPU_UCODE_ID_CP_PFP:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.pfp_fw->data;
+		adev->gfx.pfp_fw_version =
+			le32_to_cpu(cp_hdr->header.ucode_version);
+		adev->gfx.pfp_feature_version =
+			le32_to_cpu(cp_hdr->ucode_feature_version);
+		ucode_fw = adev->gfx.pfp_fw;
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_PFP:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.pfp_fw->data;
+		adev->gfx.pfp_fw_version =
+			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+		adev->gfx.pfp_feature_version =
+			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+		ucode_fw = adev->gfx.pfp_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.pfp_fw->data;
+		ucode_fw = adev->gfx.pfp_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_ME:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.me_fw->data;
+		adev->gfx.me_fw_version =
+			le32_to_cpu(cp_hdr->header.ucode_version);
+		adev->gfx.me_feature_version =
+			le32_to_cpu(cp_hdr->ucode_feature_version);
+		ucode_fw = adev->gfx.me_fw;
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_ME:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.me_fw->data;
+		adev->gfx.me_fw_version =
+			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+		adev->gfx.me_feature_version =
+			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+		ucode_fw = adev->gfx.me_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.me_fw->data;
+		ucode_fw = adev->gfx.me_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_CE:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.ce_fw->data;
+		adev->gfx.ce_fw_version =
+			le32_to_cpu(cp_hdr->header.ucode_version);
+		adev->gfx.ce_feature_version =
+			le32_to_cpu(cp_hdr->ucode_feature_version);
+		ucode_fw = adev->gfx.ce_fw;
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.mec_fw->data;
+		adev->gfx.mec_fw_version =
+			le32_to_cpu(cp_hdr->header.ucode_version);
+		adev->gfx.mec_feature_version =
+			le32_to_cpu(cp_hdr->ucode_feature_version);
+		ucode_fw = adev->gfx.mec_fw;
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+			  le32_to_cpu(cp_hdr->jt_size) * 4;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.mec_fw->data;
+		ucode_fw = adev->gfx.mec_fw;
+		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.mec2_fw->data;
+		adev->gfx.mec2_fw_version =
+			le32_to_cpu(cp_hdr->header.ucode_version);
+		adev->gfx.mec2_feature_version =
+			le32_to_cpu(cp_hdr->ucode_feature_version);
+		ucode_fw = adev->gfx.mec2_fw;
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+			  le32_to_cpu(cp_hdr->jt_size) * 4;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2_JT:
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.mec2_fw->data;
+		ucode_fw = adev->gfx.mec2_fw;
+		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.mec_fw->data;
+		adev->gfx.mec_fw_version =
+			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+		adev->gfx.mec_feature_version =
+			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+		ucode_fw = adev->gfx.mec_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.mec_fw->data;
+		ucode_fw = adev->gfx.mec_fw;
+		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+		break;
+	default:
+		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
+		return;
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		info = &adev->firmware.ucode[ucode_id];
+		info->ucode_id = ucode_id;
+		info->fw = ucode_fw;
+		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+	}
+}
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
+{
+	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
+			adev->gfx.num_xcc_per_xcp : 1));
+}
+
+static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
+						struct device_attribute *addr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	int mode;
+
+	/* Only minimal precaution taken to reject requests while in reset.*/
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+
+	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
+					       AMDGPU_XCP_FL_NONE);
+
+	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
+}
+
+static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
+						struct device_attribute *addr,
+						const char *buf, size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	enum amdgpu_gfx_partition mode;
+	int ret = 0, num_xcc;
+
+	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+	if (num_xcc % 2 != 0)
+		return -EINVAL;
+
+	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
+		mode = AMDGPU_SPX_PARTITION_MODE;
+	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
+		/*
+		 * DPX mode needs AIDs to be in multiple of 2.
+		 * Each AID connects 2 XCCs.
+		 */
+		if (num_xcc%4)
+			return -EINVAL;
+		mode = AMDGPU_DPX_PARTITION_MODE;
+	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
+		if (num_xcc != 6)
+			return -EINVAL;
+		mode = AMDGPU_TPX_PARTITION_MODE;
+	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
+		if (num_xcc != 8)
+			return -EINVAL;
+		mode = AMDGPU_QPX_PARTITION_MODE;
+	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
+		mode = AMDGPU_CPX_PARTITION_MODE;
+	} else {
+		return -EINVAL;
+	}
+
+	/* Don't allow a switch while under reset */
+	if (!down_read_trylock(&adev->reset_domain->sem))
+		return -EPERM;
+
+	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+
+	up_read(&adev->reset_domain->sem);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static const char *xcp_desc[] = {
+	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
+	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
+	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
+	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
+	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
+						struct device_attribute *addr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+	int size = 0, mode;
+	char *sep = "";
+
+	if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
+		return sysfs_emit(buf, "Not supported\n");
+
+	for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
+		size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+		sep = ", ";
+	}
+
+	size += sysfs_emit_at(buf, size, "\n");
+
+	return size;
+}
+
+static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct drm_gpu_scheduler *sched = &ring->sched;
+	struct drm_sched_entity entity;
+	static atomic_t counter;
+	struct dma_fence *f;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	void *owner;
+	int i, r;
+
+	/* Initialize the scheduler entity */
+	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
+				  &sched, 1, NULL);
+	if (r) {
+		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+		goto err;
+	}
+
+	/*
+	 * Use some unique dummy value as the owner to make sure we execute
+	 * the cleaner shader on each submission. The value just need to change
+	 * for each submission and is otherwise meaningless.
+	 */
+	owner = (void *)(unsigned long)atomic_inc_return(&counter);
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
+				     64, 0, &job,
+				     AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER);
+	if (r)
+		goto err;
+
+	job->enforce_isolation = true;
+	/* always run the cleaner shader */
+	job->run_cleaner_shader = true;
+
+	ib = &job->ibs[0];
+	for (i = 0; i <= ring->funcs->align_mask; ++i)
+		ib->ptr[i] = ring->funcs->nop;
+	ib->length_dw = ring->funcs->align_mask + 1;
+
+	f = amdgpu_job_submit(job);
+
+	r = dma_fence_wait(f, false);
+	if (r)
+		goto err;
+
+	dma_fence_put(f);
+
+	/* Clean up the scheduler entity */
+	drm_sched_entity_destroy(&entity);
+	return 0;
+
+err:
+	return r;
+}
+
+static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
+{
+	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+	struct amdgpu_ring *ring;
+	int num_xcc_to_clear;
+	int i, r, xcc_id;
+
+	if (adev->gfx.num_xcc_per_xcp)
+		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
+	else
+		num_xcc_to_clear = 1;
+
+	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
+		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
+			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
+				r = amdgpu_gfx_run_cleaner_shader_job(ring);
+				if (r)
+					return r;
+				num_xcc_to_clear--;
+				break;
+			}
+		}
+	}
+
+	if (num_xcc_to_clear)
+		return -ENOENT;
+
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
+						 struct device_attribute *attr,
+						 const char *buf,
+						 size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	int ret;
+	long value;
+
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+	if (adev->in_suspend && !adev->in_runpm)
+		return -EPERM;
+
+	if (adev->gfx.disable_kq)
+		return -EPERM;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret)
+		return -EINVAL;
+
+	if (value < 0)
+		return -EINVAL;
+
+	if (adev->xcp_mgr) {
+		if (value >= adev->xcp_mgr->num_xcps)
+			return -EINVAL;
+	} else {
+		if (value > 1)
+			return -EINVAL;
+	}
+
+	ret = pm_runtime_get_sync(ddev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(ddev->dev);
+		return ret;
+	}
+
+	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
+
+	pm_runtime_put_autosuspend(ddev->dev);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	int i;
+	ssize_t size = 0;
+
+	if (adev->xcp_mgr) {
+		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+			if (i < (adev->xcp_mgr->num_xcps - 1))
+				size += sysfs_emit_at(buf, size, " ");
+		}
+		buf[size++] = '\n';
+	} else {
+		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+	}
+
+	return size;
+}
+
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+						struct device_attribute *attr,
+						const char *buf, size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	long partition_values[MAX_XCP] = {0};
+	int ret, i, num_partitions;
+	const char *input_buf = buf;
+
+	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+		ret = sscanf(input_buf, "%ld", &partition_values[i]);
+		if (ret <= 0)
+			break;
+
+		/* Move the pointer to the next value in the string */
+		input_buf = strchr(input_buf, ' ');
+		if (input_buf) {
+			input_buf++;
+		} else {
+			i++;
+			break;
+		}
+	}
+	num_partitions = i;
+
+	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+		return -EINVAL;
+
+	if (!adev->xcp_mgr && num_partitions != 1)
+		return -EINVAL;
+
+	for (i = 0; i < num_partitions; i++) {
+		if (partition_values[i] != 0 &&
+		    partition_values[i] != 1 &&
+		    partition_values[i] != 2 &&
+		    partition_values[i] != 3)
+			return -EINVAL;
+	}
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+	for (i = 0; i < num_partitions; i++) {
+		switch (partition_values[i]) {
+		case 0:
+		default:
+			adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+			break;
+		case 1:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE;
+			break;
+		case 2:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+			break;
+		case 3:
+			adev->enforce_isolation[i] =
+				AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+			break;
+		}
+	}
+	mutex_unlock(&adev->enforce_isolation_mutex);
+
+	amdgpu_mes_update_enforce_isolation(adev);
+
+	return count;
+}
+
+static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
+}
+
+static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
+}
+
+static DEVICE_ATTR(run_cleaner_shader, 0200,
+		   NULL, amdgpu_gfx_set_run_cleaner_shader);
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+		   amdgpu_gfx_get_enforce_isolation,
+		   amdgpu_gfx_set_enforce_isolation);
+
+static DEVICE_ATTR(current_compute_partition, 0644,
+		   amdgpu_gfx_get_current_compute_partition,
+		   amdgpu_gfx_set_compute_partition);
+
+static DEVICE_ATTR(available_compute_partition, 0444,
+		   amdgpu_gfx_get_available_compute_partition, NULL);
+static DEVICE_ATTR(gfx_reset_mask, 0444,
+		   amdgpu_gfx_get_gfx_reset_mask, NULL);
+
+static DEVICE_ATTR(compute_reset_mask, 0444,
+		   amdgpu_gfx_get_compute_reset_mask, NULL);
+
+static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+	bool xcp_switch_supported;
+	int r;
+
+	if (!xcp_mgr)
+		return 0;
+
+	xcp_switch_supported =
+		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+
+	if (!xcp_switch_supported)
+		dev_attr_current_compute_partition.attr.mode &=
+			~(S_IWUSR | S_IWGRP | S_IWOTH);
+
+	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
+	if (r)
+		return r;
+
+	if (xcp_switch_supported)
+		r = device_create_file(adev->dev,
+				       &dev_attr_available_compute_partition);
+
+	return r;
+}
+
+static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+	bool xcp_switch_supported;
+
+	if (!xcp_mgr)
+		return;
+
+	xcp_switch_supported =
+		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
+
+	if (xcp_switch_supported)
+		device_remove_file(adev->dev,
+				   &dev_attr_available_compute_partition);
+}
+
+static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+	if (r)
+		return r;
+	if (adev->gfx.enable_cleaner_shader)
+		r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
+
+	return r;
+}
+
+static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+	if (adev->gfx.enable_cleaner_shader)
+		device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+}
+
+static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (!amdgpu_gpu_recovery)
+		return r;
+
+	if (adev->gfx.num_gfx_rings) {
+		r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
+		if (r)
+			return r;
+	}
+
+	if (adev->gfx.num_compute_rings) {
+		r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_gpu_recovery)
+		return;
+
+	if (adev->gfx.num_gfx_rings)
+		device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
+
+	if (adev->gfx.num_compute_rings)
+		device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
+}
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_gfx_sysfs_xcp_init(adev);
+	if (r) {
+		dev_err(adev->dev, "failed to create xcp sysfs files");
+		return r;
+	}
+
+	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+	if (r)
+		dev_err(adev->dev, "failed to create isolation sysfs files");
+
+	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
+	if (r)
+		dev_err(adev->dev, "failed to create reset mask sysfs files");
+
+	return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (adev->dev->kobj.sd) {
+		amdgpu_gfx_sysfs_xcp_fini(adev);
+		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+		amdgpu_gfx_sysfs_reset_mask_fini(adev);
+	}
+}
+
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+				      unsigned int cleaner_shader_size)
+{
+	if (!adev->gfx.enable_cleaner_shader)
+		return -EOPNOTSUPP;
+
+	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
+				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
+				       &adev->gfx.cleaner_shader_obj,
+				       &adev->gfx.cleaner_shader_gpu_addr,
+				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
+{
+	if (!adev->gfx.enable_cleaner_shader)
+		return;
+
+	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
+			      &adev->gfx.cleaner_shader_gpu_addr,
+			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
+}
+
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+				    unsigned int cleaner_shader_size,
+				    const void *cleaner_shader_ptr)
+{
+	if (!adev->gfx.enable_cleaner_shader)
+		return;
+
+	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
+		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
+			    cleaner_shader_size);
+}
+
+/**
+ * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the scheduler to control
+ * @enable: Whether to enable or disable the KFD scheduler
+ *
+ * This function is used to control the KFD (Kernel Fusion Driver) scheduler
+ * from the KGD. It is part of the cleaner shader feature. This function plays
+ * a key role in enforcing process isolation on the GPU.
+ *
+ * The function uses a reference count mechanism (kfd_sch_req_count) to keep
+ * track of the number of requests to enable the KFD scheduler. When a request
+ * to enable the KFD scheduler is made, the reference count is decremented.
+ * When the reference count reaches zero, a delayed work is scheduled to
+ * enforce isolation after a delay of GFX_SLICE_PERIOD.
+ *
+ * When a request to disable the KFD scheduler is made, the function first
+ * checks if the reference count is zero. If it is, it cancels the delayed work
+ * for enforcing isolation and checks if the KFD scheduler is active. If the
+ * KFD scheduler is active, it sends a request to stop the KFD scheduler and
+ * sets the KFD scheduler state to inactive. Then, it increments the reference
+ * count.
+ *
+ * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
+ * scheduler state and reference count are updated atomically.
+ *
+ * Note: If the reference count is already zero when a request to enable the
+ * KFD scheduler is made, it means there's an imbalance bug somewhere. The
+ * function triggers a warning in this case.
+ */
+static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
+				    bool enable)
+{
+	mutex_lock(&adev->gfx.userq_sch_mutex);
+
+	if (enable) {
+		/* If the count is already 0, it means there's an imbalance bug somewhere.
+		 * Note that the bug may be in a different caller than the one which triggers the
+		 * WARN_ON_ONCE.
+		 */
+		if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
+			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
+			goto unlock;
+		}
+
+		adev->gfx.userq_sch_req_count[idx]--;
+
+		if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+		    adev->gfx.userq_sch_inactive[idx]) {
+			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
+		}
+	} else {
+		if (adev->gfx.userq_sch_req_count[idx] == 0) {
+			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
+			if (!adev->gfx.userq_sch_inactive[idx]) {
+				amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+				if (adev->kfd.init_complete)
+					amdgpu_amdkfd_stop_sched(adev, idx);
+				adev->gfx.userq_sch_inactive[idx] = true;
+			}
+		}
+
+		adev->gfx.userq_sch_req_count[idx]++;
+	}
+
+unlock:
+	mutex_unlock(&adev->gfx.userq_sch_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
+ *
+ * @work: work_struct.
+ *
+ * This function is the work handler for enforcing shader isolation on AMD GPUs.
+ * It counts the number of emitted fences for each GFX and compute ring. If there
+ * are any fences, it schedules the `enforce_isolation_work` to be run after a
+ * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
+ * Driver (KFD) to resume the runqueue. The function is synchronized using the
+ * `enforce_isolation_mutex`.
+ */
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
+{
+	struct amdgpu_isolation_work *isolation_work =
+		container_of(work, struct amdgpu_isolation_work, work.work);
+	struct amdgpu_device *adev = isolation_work->adev;
+	u32 i, idx, fences = 0;
+
+	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
+		idx = 0;
+	else
+		idx = isolation_work->xcp_id;
+
+	if (idx >= MAX_XCP)
+		return;
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
+		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
+			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+	}
+	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
+		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
+			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+	}
+	if (fences) {
+		/* we've already had our timeslice, so let's wrap this up */
+		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
+				      msecs_to_jiffies(1));
+	} else {
+		/* Tell KFD to resume the runqueue */
+		WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+		WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+		amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+		if (adev->kfd.init_complete)
+			amdgpu_amdkfd_start_sched(adev, idx);
+		adev->gfx.userq_sch_inactive[idx] = false;
+	}
+	mutex_unlock(&adev->enforce_isolation_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
+static void
+amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
+					  u32 idx)
+{
+	unsigned long cjiffies;
+	bool wait = false;
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+		/* set the initial values if nothing is set */
+		if (!adev->gfx.enforce_isolation_jiffies[idx]) {
+			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+			adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
+		}
+		/* Make sure KFD gets a chance to run */
+		if (amdgpu_amdkfd_compute_active(adev, idx)) {
+			cjiffies = jiffies;
+			if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
+				cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
+				if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
+					/* if our time is up, let KGD work drain before scheduling more */
+					wait = true;
+					/* reset the timer period */
+					adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
+				} else {
+					/* set the timer period to what's left in our time slice */
+					adev->gfx.enforce_isolation_time[idx] =
+						GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
+				}
+			} else {
+				/* if jiffies wrap around we will just wait a little longer */
+				adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+			}
+		} else {
+			/* if there is no KFD work, then set the full slice period */
+			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+			adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+		}
+	}
+	mutex_unlock(&adev->enforce_isolation_mutex);
+
+	if (wait)
+		msleep(GFX_SLICE_PERIOD_MS);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 idx;
+	bool sched_work = false;
+
+	if (!adev->gfx.enable_cleaner_shader)
+		return;
+
+	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+		idx = 0;
+	else
+		idx = ring->xcp_id;
+
+	if (idx >= MAX_XCP)
+		return;
+
+	/* Don't submit more work until KFD has had some time */
+	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+		if (adev->kfd.init_complete)
+			sched_work = true;
+	}
+	mutex_unlock(&adev->enforce_isolation_mutex);
+
+	if (sched_work)
+		amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 idx;
+	bool sched_work = false;
+
+	if (!adev->gfx.enable_cleaner_shader)
+		return;
+
+	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
+		idx = 0;
+	else
+		idx = ring->xcp_id;
+
+	if (idx >= MAX_XCP)
+		return;
+
+	mutex_lock(&adev->enforce_isolation_mutex);
+	if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+		if (adev->kfd.init_complete)
+			sched_work = true;
+	}
+	mutex_unlock(&adev->enforce_isolation_mutex);
+
+	if (sched_work)
+		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+}
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, gfx.idle_work.work);
+	enum PP_SMC_POWER_PROFILE profile;
+	u32 i, fences = 0;
+	int r;
+
+	if (adev->gfx.num_gfx_rings)
+		profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+	else
+		profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+		fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+		fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+	if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
+		mutex_lock(&adev->gfx.workload_profile_mutex);
+		if (adev->gfx.workload_profile_active) {
+			r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+			if (r)
+				dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+					 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+					 "fullscreen 3D" : "compute");
+			adev->gfx.workload_profile_active = false;
+		}
+		mutex_unlock(&adev->gfx.workload_profile_mutex);
+	} else {
+		schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+	}
+}
+
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	enum PP_SMC_POWER_PROFILE profile;
+	int r;
+
+	if (amdgpu_dpm_is_overdrive_enabled(adev))
+		return;
+
+	if (adev->gfx.num_gfx_rings)
+		profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+	else
+		profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+	atomic_inc(&adev->gfx.total_submission_cnt);
+
+	cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+	/* We can safely return early here because we've cancelled the
+	 * the delayed work so there is no one else to set it to false
+	 * and we don't care if someone else sets it to true.
+	 */
+	if (adev->gfx.workload_profile_active)
+		return;
+
+	mutex_lock(&adev->gfx.workload_profile_mutex);
+	if (!adev->gfx.workload_profile_active) {
+		r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+		if (r)
+			dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+				 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+				 "fullscreen 3D" : "compute");
+		adev->gfx.workload_profile_active = true;
+	}
+	mutex_unlock(&adev->gfx.workload_profile_mutex);
+}
+
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (amdgpu_dpm_is_overdrive_enabled(adev))
+		return;
+
+	atomic_dec(&ring->adev->gfx.total_submission_cnt);
+
+	schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer)
+{
+	u32 count = 0;
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	buffer[count++] = cpu_to_le32(0x80000000);
+	buffer[count++] = cpu_to_le32(0x80000000);
+
+	return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count)
+{
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+	u32 i;
+
+	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+				buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = cpu_to_le32(ext->extent[i]);
+			}
+		}
+	}
+
+	return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count)
+{
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+	buffer[count++] = cpu_to_le32(0);
+}
+
+/*
+ * debugfs for to enable/disable gfx job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+
+	mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
+	if ((val & mask) == 0)
+		return -EINVAL;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+		ring = &adev->gfx.gfx_ring[i];
+		if (val & (1 << i))
+			ring->sched.ready = true;
+		else
+			ring->sched.ready = false;
+	}
+	/* publish sched.ready flag update effective immediately across smp */
+	smp_rmb();
+	return 0;
+}
+
+static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+		ring = &adev->gfx.gfx_ring[i];
+		if (ring->sched.ready)
+			mask |= 1ULL << i;
+	}
+
+	*val = mask;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
+			 amdgpu_debugfs_gfx_sched_mask_get,
+			 amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	if (!(adev->gfx.num_gfx_rings > 1))
+		return;
+	sprintf(name, "amdgpu_gfx_sched_mask");
+	debugfs_create_file(name, 0600, root, adev,
+			    &amdgpu_debugfs_gfx_sched_mask_fops);
+#endif
+}
+
+/*
+ * debugfs for to enable/disable compute job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+
+	mask = (1ULL << adev->gfx.num_compute_rings) - 1;
+	if ((val & mask) == 0)
+		return -EINVAL;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+		ring = &adev->gfx.compute_ring[i];
+		if (val & (1 << i))
+			ring->sched.ready = true;
+		else
+			ring->sched.ready = false;
+	}
+
+	/* publish sched.ready flag update effective immediately across smp */
+	smp_rmb();
+	return 0;
+}
+
+static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+		ring = &adev->gfx.compute_ring[i];
+		if (ring->sched.ready)
+			mask |= 1ULL << i;
+	}
+
+	*val = mask;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
+			 amdgpu_debugfs_compute_sched_mask_get,
+			 amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	if (!(adev->gfx.num_compute_rings > 1))
+		return;
+	sprintf(name, "amdgpu_compute_sched_mask");
+	debugfs_create_file(name, 0600, root, adev,
+			    &amdgpu_debugfs_compute_sched_mask_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
new file mode 100644
index 000000000000..efd61a1ccc66
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -0,0 +1,671 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_GFX_H__
+#define __AMDGPU_GFX_H__
+
+/*
+ * GFX stuff
+ */
+#include "clearstate_defs.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_rlc.h"
+#include "amdgpu_imu.h"
+#include "soc15.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_xcp.h"
+
+/* GFX current status */
+#define AMDGPU_GFX_NORMAL_MODE			0x00000000L
+#define AMDGPU_GFX_SAFE_MODE			0x00000001L
+#define AMDGPU_GFX_PG_DISABLED_MODE		0x00000002L
+#define AMDGPU_GFX_CG_DISABLED_MODE		0x00000004L
+#define AMDGPU_GFX_LBPW_DISABLED_MODE		0x00000008L
+
+#define AMDGPU_MAX_GC_INSTANCES		8
+#define AMDGPU_MAX_QUEUES		128
+
+#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES
+#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES
+
+enum amdgpu_gfx_pipe_priority {
+	AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1,
+	AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2
+};
+
+#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
+#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
+
+/* 1 second timeout */
+#define GFX_PROFILE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+enum amdgpu_gfx_partition {
+	AMDGPU_SPX_PARTITION_MODE = 0,
+	AMDGPU_DPX_PARTITION_MODE = 1,
+	AMDGPU_TPX_PARTITION_MODE = 2,
+	AMDGPU_QPX_PARTITION_MODE = 3,
+	AMDGPU_CPX_PARTITION_MODE = 4,
+	AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1,
+	/* Automatically choose the right mode */
+	AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2,
+};
+
+#define NUM_XCC(x) hweight16(x)
+
+enum amdgpu_gfx_ras_mem_id_type {
+	AMDGPU_GFX_CP_MEM = 0,
+	AMDGPU_GFX_GCEA_MEM,
+	AMDGPU_GFX_GC_CANE_MEM,
+	AMDGPU_GFX_GCUTCL2_MEM,
+	AMDGPU_GFX_GDS_MEM,
+	AMDGPU_GFX_LDS_MEM,
+	AMDGPU_GFX_RLC_MEM,
+	AMDGPU_GFX_SP_MEM,
+	AMDGPU_GFX_SPI_MEM,
+	AMDGPU_GFX_SQC_MEM,
+	AMDGPU_GFX_SQ_MEM,
+	AMDGPU_GFX_TA_MEM,
+	AMDGPU_GFX_TCC_MEM,
+	AMDGPU_GFX_TCA_MEM,
+	AMDGPU_GFX_TCI_MEM,
+	AMDGPU_GFX_TCP_MEM,
+	AMDGPU_GFX_TD_MEM,
+	AMDGPU_GFX_TCX_MEM,
+	AMDGPU_GFX_ATC_L2_MEM,
+	AMDGPU_GFX_UTCL2_MEM,
+	AMDGPU_GFX_VML2_MEM,
+	AMDGPU_GFX_VML2_WALKER_MEM,
+	AMDGPU_GFX_MEM_TYPE_NUM
+};
+
+struct amdgpu_mec {
+	struct amdgpu_bo	*hpd_eop_obj;
+	u64			hpd_eop_gpu_addr;
+	struct amdgpu_bo	*mec_fw_obj;
+	u64			mec_fw_gpu_addr;
+	struct amdgpu_bo	*mec_fw_data_obj;
+	u64			mec_fw_data_gpu_addr;
+
+	u32 num_mec;
+	u32 num_pipe_per_mec;
+	u32 num_queue_per_pipe;
+	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+};
+
+struct amdgpu_mec_bitmap {
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+enum amdgpu_unmap_queues_action {
+	PREEMPT_QUEUES = 0,
+	RESET_QUEUES,
+	DISABLE_PROCESS_QUEUES,
+	PREEMPT_QUEUES_NO_UNMAP,
+};
+
+struct kiq_pm4_funcs {
+	/* Support ASIC-specific kiq pm4 packets*/
+	void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring,
+					uint64_t queue_mask);
+	void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring,
+					struct amdgpu_ring *ring);
+	void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring,
+				 enum amdgpu_unmap_queues_action action,
+				 u64 gpu_addr, u64 seq);
+	void (*kiq_query_status)(struct amdgpu_ring *kiq_ring,
+					struct amdgpu_ring *ring,
+					u64 addr,
+					u64 seq);
+	void (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub);
+	void (*kiq_reset_hw_queue)(struct amdgpu_ring *kiq_ring,
+				   uint32_t queue_type, uint32_t me_id,
+				   uint32_t pipe_id, uint32_t queue_id,
+				   uint32_t xcc_id, uint32_t vmid);
+	/* Packet sizes */
+	int set_resources_size;
+	int map_queues_size;
+	int unmap_queues_size;
+	int query_status_size;
+	int invalidate_tlbs_size;
+};
+
+struct amdgpu_kiq {
+	u64			eop_gpu_addr;
+	struct amdgpu_bo	*eop_obj;
+	spinlock_t              ring_lock;
+	struct amdgpu_ring	ring;
+	struct amdgpu_irq_src	irq;
+	const struct kiq_pm4_funcs *pmf;
+	void			*mqd_backup;
+};
+
+/*
+ * GFX configurations
+ */
+#define AMDGPU_GFX_MAX_SE 4
+#define AMDGPU_GFX_MAX_SH_PER_SE 2
+
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
+struct amdgpu_rb_config {
+	/**
+	 * @rb_backend_disable:
+	 *
+	 * The value captured from register RB_BACKEND_DISABLE indicates if the
+	 * RB backend is disabled or not.
+	 */
+	uint32_t rb_backend_disable;
+
+	/**
+	 * @user_rb_backend_disable:
+	 *
+	 * The value captured from register USER_RB_BACKEND_DISABLE indicates
+	 * if the User RB backend is disabled or not.
+	 */
+	uint32_t user_rb_backend_disable;
+
+	/**
+	 * @raster_config:
+	 *
+	 * To set up all of the states, it is necessary to have two registers
+	 * to keep all of the states. This field holds the first register.
+	 */
+	uint32_t raster_config;
+
+	/**
+	 * @raster_config_1:
+	 *
+	 * To set up all of the states, it is necessary to have two registers
+	 * to keep all of the states. This field holds the second register.
+	 */
+	uint32_t raster_config_1;
+};
+
+struct gb_addr_config {
+	uint16_t pipe_interleave_size;
+	uint8_t num_pipes;
+	uint8_t max_compress_frags;
+	uint8_t num_banks;
+	uint8_t num_se;
+	uint8_t num_rb_per_se;
+	uint8_t num_pkrs;
+};
+
+struct amdgpu_gfx_config {
+	unsigned max_shader_engines;
+	unsigned max_tile_pipes;
+	unsigned max_cu_per_sh;
+	unsigned max_sh_per_se;
+	unsigned max_backends_per_se;
+	unsigned max_texture_channel_caches;
+	unsigned max_gprs;
+	unsigned max_gs_threads;
+	unsigned max_hw_contexts;
+	unsigned sc_prim_fifo_size_frontend;
+	unsigned sc_prim_fifo_size_backend;
+	unsigned sc_hiz_tile_fifo_size;
+	unsigned sc_earlyz_tile_fifo_size;
+
+	unsigned num_tile_pipes;
+	unsigned backend_enable_mask;
+	unsigned mem_max_burst_length_bytes;
+	unsigned mem_row_size_in_kb;
+	unsigned shader_engine_tile_size;
+	unsigned num_gpus;
+	unsigned multi_gpu_tile_size;
+	unsigned mc_arb_ramcfg;
+	unsigned num_banks;
+	unsigned num_ranks;
+	unsigned gb_addr_config;
+	unsigned num_rbs;
+	unsigned gs_vgt_table_depth;
+	unsigned gs_prim_buffer_depth;
+
+	uint32_t tile_mode_array[32];
+	uint32_t macrotile_mode_array[16];
+
+	struct gb_addr_config gb_addr_config_fields;
+
+	/**
+	 * @rb_config:
+	 *
+	 * Matrix that keeps all the Render Backend (color and depth buffer
+	 * handling) configuration on the 3D engine.
+	 */
+	struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+	/* gfx configure feature */
+	uint32_t double_offchip_lds_buf;
+	/* cached value of DB_DEBUG2 */
+	uint32_t db_debug2;
+	/* gfx10 specific config */
+	uint32_t num_sc_per_sh;
+	uint32_t num_packer_per_sc;
+	uint32_t pa_sc_tile_steering_override;
+	/* Whether texture coordinate truncation is conformant. */
+	bool ta_cntl2_truncate_coord_mode;
+	uint64_t tcc_disabled_mask;
+	uint32_t gc_num_tcp_per_sa;
+	uint32_t gc_num_sdp_interface;
+	uint32_t gc_num_tcps;
+	uint32_t gc_num_tcp_per_wpg;
+	uint32_t gc_tcp_l1_size;
+	uint32_t gc_num_sqc_per_wgp;
+	uint32_t gc_l1_instruction_cache_size_per_sqc;
+	uint32_t gc_l1_data_cache_size_per_sqc;
+	uint32_t gc_gl1c_per_sa;
+	uint32_t gc_gl1c_size_per_instance;
+	uint32_t gc_gl2c_per_gpu;
+	uint32_t gc_tcp_size_per_cu;
+	uint32_t gc_num_cu_per_sqc;
+	uint32_t gc_tcc_size;
+	uint32_t gc_tcp_cache_line_size;
+	uint32_t gc_instruction_cache_size_per_sqc;
+	uint32_t gc_instruction_cache_line_size;
+	uint32_t gc_scalar_data_cache_size_per_sqc;
+	uint32_t gc_scalar_data_cache_line_size;
+	uint32_t gc_tcc_cache_line_size;
+};
+
+struct amdgpu_cu_info {
+	uint32_t simd_per_cu;
+	uint32_t max_waves_per_simd;
+	uint32_t wave_front_size;
+	uint32_t max_scratch_slots_per_cu;
+	uint32_t lds_size;
+
+	/* total active CU number */
+	uint32_t number;
+	uint32_t ao_cu_mask;
+	uint32_t ao_cu_bitmap[4][4];
+	uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
+};
+
+struct amdgpu_gfx_ras {
+	struct amdgpu_ras_block_object  ras_block;
+	void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+	int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry);
+	int (*poison_consumption_handler)(struct amdgpu_device *adev,
+						struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_gfx_shadow_info {
+	u32 shadow_size;
+	u32 shadow_alignment;
+	u32 csa_size;
+	u32 csa_alignment;
+};
+
+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
+			     u32 sh_num, u32 instance, int xcc_id);
+	void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+			       uint32_t wave, uint32_t *dst, int *no_fields);
+	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				uint32_t wave, uint32_t thread, uint32_t start,
+				uint32_t size, uint32_t *dst);
+	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				uint32_t wave, uint32_t start, uint32_t size,
+				uint32_t *dst);
+	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
+				 u32 queue, u32 vmid, u32 xcc_id);
+	void (*init_spm_golden)(struct amdgpu_device *adev);
+	void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+	int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+				   struct amdgpu_gfx_shadow_info *shadow_info,
+				   bool skip_check);
+	enum amdgpu_gfx_partition
+			(*query_partition_mode)(struct amdgpu_device *adev);
+	int (*switch_partition_mode)(struct amdgpu_device *adev,
+				     int num_xccs_per_xcp);
+	int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node);
+	int (*get_xccs_per_xcp)(struct amdgpu_device *adev);
+};
+
+struct sq_work {
+	struct work_struct	work;
+	unsigned ih_data;
+};
+
+struct amdgpu_pfp {
+	struct amdgpu_bo		*pfp_fw_obj;
+	uint64_t			pfp_fw_gpu_addr;
+	uint32_t			*pfp_fw_ptr;
+
+	struct amdgpu_bo		*pfp_fw_data_obj;
+	uint64_t			pfp_fw_data_gpu_addr;
+	uint32_t			*pfp_fw_data_ptr;
+};
+
+struct amdgpu_ce {
+	struct amdgpu_bo		*ce_fw_obj;
+	uint64_t			ce_fw_gpu_addr;
+	uint32_t			*ce_fw_ptr;
+};
+
+struct amdgpu_me {
+	struct amdgpu_bo		*me_fw_obj;
+	uint64_t			me_fw_gpu_addr;
+	uint32_t			*me_fw_ptr;
+
+	struct amdgpu_bo		*me_fw_data_obj;
+	uint64_t			me_fw_data_gpu_addr;
+	uint32_t			*me_fw_data_ptr;
+
+	uint32_t			num_me;
+	uint32_t			num_pipe_per_me;
+	uint32_t			num_queue_per_pipe;
+	void				*mqd_backup[AMDGPU_MAX_GFX_RINGS];
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+};
+
+struct amdgpu_isolation_work {
+	struct amdgpu_device		*adev;
+	u32				xcp_id;
+	struct delayed_work		work;
+};
+
+struct amdgpu_gfx {
+	struct mutex			gpu_clock_mutex;
+	struct amdgpu_gfx_config	config;
+	struct amdgpu_rlc		rlc;
+	struct amdgpu_pfp		pfp;
+	struct amdgpu_ce		ce;
+	struct amdgpu_me		me;
+	struct amdgpu_mec		mec;
+	struct amdgpu_mec_bitmap	mec_bitmap[AMDGPU_MAX_GC_INSTANCES];
+	struct amdgpu_kiq		kiq[AMDGPU_MAX_GC_INSTANCES];
+	struct amdgpu_imu		imu;
+	bool				rs64_enable; /* firmware format */
+	const struct firmware		*me_fw;	/* ME firmware */
+	uint32_t			me_fw_version;
+	const struct firmware		*pfp_fw; /* PFP firmware */
+	uint32_t			pfp_fw_version;
+	const struct firmware		*ce_fw;	/* CE firmware */
+	uint32_t			ce_fw_version;
+	const struct firmware		*rlc_fw; /* RLC firmware */
+	uint32_t			rlc_fw_version;
+	const struct firmware		*mec_fw; /* MEC firmware */
+	uint32_t			mec_fw_version;
+	const struct firmware		*mec2_fw; /* MEC2 firmware */
+	uint32_t			mec2_fw_version;
+	const struct firmware		*imu_fw; /* IMU firmware */
+	uint32_t			imu_fw_version;
+	uint32_t			me_feature_version;
+	uint32_t			ce_feature_version;
+	uint32_t			pfp_feature_version;
+	uint32_t			rlc_feature_version;
+	uint32_t			rlc_srlc_fw_version;
+	uint32_t			rlc_srlc_feature_version;
+	uint32_t			rlc_srlg_fw_version;
+	uint32_t			rlc_srlg_feature_version;
+	uint32_t			rlc_srls_fw_version;
+	uint32_t			rlc_srls_feature_version;
+	uint32_t			rlcp_ucode_version;
+	uint32_t			rlcp_ucode_feature_version;
+	uint32_t			rlcv_ucode_version;
+	uint32_t			rlcv_ucode_feature_version;
+	uint32_t			mec_feature_version;
+	uint32_t			mec2_feature_version;
+	bool				mec_fw_write_wait;
+	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
+	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
+	unsigned			num_gfx_rings;
+	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES];
+	unsigned			num_compute_rings;
+	struct amdgpu_irq_src		eop_irq;
+	struct amdgpu_irq_src		priv_reg_irq;
+	struct amdgpu_irq_src		priv_inst_irq;
+	struct amdgpu_irq_src		bad_op_irq;
+	struct amdgpu_irq_src		cp_ecc_error_irq;
+	struct amdgpu_irq_src		sq_irq;
+	struct amdgpu_irq_src		rlc_gc_fed_irq;
+	struct sq_work			sq_work;
+
+	/* gfx status */
+	uint32_t			gfx_current_status;
+	/* ce ram size*/
+	unsigned			ce_ram_size;
+	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
+
+	/* reset mask */
+	uint32_t                        grbm_soft_reset;
+	uint32_t                        srbm_soft_reset;
+	uint32_t 			gfx_supported_reset;
+	uint32_t 			compute_supported_reset;
+
+	/* gfx off */
+	bool                            gfx_off_state;      /* true: enabled, false: disabled */
+	struct mutex                    gfx_off_mutex;      /* mutex to change gfxoff state */
+	uint32_t                        gfx_off_req_count;  /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+	struct delayed_work             gfx_off_delay_work; /* async work to set gfx block off */
+	uint32_t                        gfx_off_residency;  /* last logged residency */
+	uint64_t                        gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */
+
+	/* pipe reservation */
+	struct mutex			pipe_reserve_mutex;
+	DECLARE_BITMAP			(pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/*ras */
+	struct ras_common_if		*ras_if;
+	struct amdgpu_gfx_ras		*ras;
+
+	bool				is_poweron;
+
+	struct amdgpu_ring		sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+	struct amdgpu_ring_mux          muxer;
+
+	bool				cp_gfx_shadow; /* for gfx11 */
+
+	uint16_t 			xcc_mask;
+	uint32_t			num_xcc_per_xcp;
+	struct mutex			partition_mutex;
+	bool				mcbp; /* mid command buffer preemption */
+
+	/* IP reg dump */
+	uint32_t			*ip_dump_core;
+	uint32_t			*ip_dump_compute_queues;
+	uint32_t			*ip_dump_gfx_queues;
+
+	struct mutex			reset_sem_mutex;
+
+	/* cleaner shader */
+	struct amdgpu_bo		*cleaner_shader_obj;
+	unsigned int                    cleaner_shader_size;
+	u64				cleaner_shader_gpu_addr;
+	void				*cleaner_shader_cpu_ptr;
+	const void			*cleaner_shader_ptr;
+	bool				enable_cleaner_shader;
+	struct amdgpu_isolation_work	enforce_isolation[MAX_XCP];
+	/* Mutex for synchronizing KFD scheduler operations */
+	struct mutex                    userq_sch_mutex;
+	u64				userq_sch_req_count[MAX_XCP];
+	bool				userq_sch_inactive[MAX_XCP];
+	unsigned long			enforce_isolation_jiffies[MAX_XCP];
+	unsigned long			enforce_isolation_time[MAX_XCP];
+
+	atomic_t			total_submission_cnt;
+	struct delayed_work		idle_work;
+	bool				workload_profile_active;
+	struct mutex                    workload_profile_mutex;
+
+	bool				disable_kq;
+	bool				disable_uq;
+};
+
+struct amdgpu_gfx_ras_reg_entry {
+	struct amdgpu_ras_err_status_reg_entry reg_entry;
+	enum amdgpu_gfx_ras_mem_id_type mem_id_type;
+	uint32_t se_num;
+};
+
+struct amdgpu_gfx_ras_mem_id_entry {
+	const struct amdgpu_ras_memory_id_entry *mem_id_ent;
+	uint32_t size;
+};
+
+#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)},
+
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
+#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
+
+/**
+ * amdgpu_gfx_create_bitmask - create a bitmask
+ *
+ * @bit_width: length of the mask
+ *
+ * create a variable length bit mask.
+ * Returns the bitmask.
+ */
+static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
+{
+	return (u32)((1ULL << bit_width) - 1);
+}
+
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
+				 unsigned max_sh);
+
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id);
+
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+			unsigned hpd_size, int xcc_id);
+
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+			   unsigned mqd_size, int xcc_id);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id);
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+				int pipe, int queue);
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+				 int *mec, int *pipe, int *queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id,
+				     int mec, int pipe, int queue);
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+					       struct amdgpu_ring *ring);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+						struct amdgpu_ring *ring);
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
+				    int pipe, int queue);
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
+int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
+int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
+int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry);
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
+int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev);
+int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
+int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
+						struct amdgpu_iv_entry *entry);
+
+bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
+		void *ras_error_status,
+		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
+				int xcc_id));
+int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
+				      unsigned int cleaner_shader_size);
+void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
+void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
+				    unsigned int cleaner_shader_size,
+				    const void *cleaner_shader_ptr);
+void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
+void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
+
+static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
+{
+	switch (mode) {
+	case AMDGPU_SPX_PARTITION_MODE:
+		return "SPX";
+	case AMDGPU_DPX_PARTITION_MODE:
+		return "DPX";
+	case AMDGPU_TPX_PARTITION_MODE:
+		return "TPX";
+	case AMDGPU_QPX_PARTITION_MODE:
+		return "QPX";
+	case AMDGPU_CPX_PARTITION_MODE:
+		return "CPX";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
new file mode 100644
index 000000000000..c7b44aeb671b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GFXHUB_H__
+#define __AMDGPU_GFXHUB_H__
+
+struct amdgpu_gfxhub_funcs {
+	u64 (*get_fb_location)(struct amdgpu_device *adev);
+	u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
+	void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
+			uint64_t page_table_base);
+	int (*gart_enable)(struct amdgpu_device *adev);
+
+	void (*gart_disable)(struct amdgpu_device *adev);
+	void (*set_fault_enable_default)(struct amdgpu_device *adev, bool value);
+	void (*init)(struct amdgpu_device *adev);
+	int (*get_xgmi_info)(struct amdgpu_device *adev);
+	void (*utcl2_harvest)(struct amdgpu_device *adev);
+	void (*mode2_save_regs)(struct amdgpu_device *adev);
+	void (*mode2_restore_regs)(struct amdgpu_device *adev);
+	void (*halt)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_gfxhub {
+	const struct amdgpu_gfxhub_funcs *funcs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
new file mode 100644
index 000000000000..869bceb0fe2c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -0,0 +1,1682 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
+#include "amdgpu.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_xgmi.h"
+
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+
+static const u64 four_gb = 0x100000000ULL;
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
+{
+	return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev);
+}
+
+/**
+ * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate video memory for pdb0 and map it for CPU access
+ * Returns 0 for success, error for failure.
+ */
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
+{
+	int r;
+	struct amdgpu_bo_param bp;
+	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+	uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
+	uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = PAGE_ALIGN((npdes + 1) * 8);
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
+	if (unlikely(r != 0))
+		goto bo_reserve_failure;
+
+	r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
+	if (r)
+		goto bo_pin_failure;
+	r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
+	if (r)
+		goto bo_kmap_failure;
+
+	amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+	return 0;
+
+bo_kmap_failure:
+	amdgpu_bo_unpin(adev->gmc.pdb0_bo);
+bo_pin_failure:
+	amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
+bo_reserve_failure:
+	amdgpu_bo_unref(&adev->gmc.pdb0_bo);
+	return r;
+}
+
+/**
+ * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
+ *
+ * @bo: the BO to get the PDE for
+ * @level: the level in the PD hirarchy
+ * @addr: resulting addr
+ * @flags: resulting flags
+ *
+ * Get the address and flags to be used for a PDE (Page Directory Entry).
+ */
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+			       uint64_t *addr, uint64_t *flags)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	switch (bo->tbo.resource->mem_type) {
+	case TTM_PL_TT:
+		*addr = bo->tbo.ttm->dma_address[0];
+		break;
+	case TTM_PL_VRAM:
+		*addr = amdgpu_bo_gpu_offset(bo);
+		break;
+	default:
+		*addr = 0;
+		break;
+	}
+	*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
+	amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
+}
+
+/*
+ * amdgpu_gmc_pd_addr - return the address of the root directory
+ */
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint64_t pd_addr;
+
+	/* TODO: move that into ASIC specific code */
+	if (adev->asic_type >= CHIP_VEGA10) {
+		uint64_t flags = AMDGPU_PTE_VALID;
+
+		amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
+		pd_addr |= flags;
+	} else {
+		pd_addr = amdgpu_bo_gpu_offset(bo);
+	}
+	return pd_addr;
+}
+
+/**
+ * amdgpu_gmc_set_pte_pde - update the page tables using CPU
+ *
+ * @adev: amdgpu_device pointer
+ * @cpu_pt_addr: cpu address of the page table
+ * @gpu_page_idx: entry in the page table to update
+ * @addr: dst addr to write into pte/pde
+ * @flags: access flags
+ *
+ * Update the page tables using CPU.
+ */
+int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				uint32_t gpu_page_idx, uint64_t addr,
+				uint64_t flags)
+{
+	void __iomem *ptr = (void *)cpu_pt_addr;
+	uint64_t value;
+
+	/*
+	 * The following is for PTE only. GART does not have PDEs.
+	*/
+	value = addr & 0x0000FFFFFFFFF000ULL;
+	value |= flags;
+	writeq(value, ptr + (gpu_page_idx * 8));
+
+	return 0;
+}
+
+/**
+ * amdgpu_gmc_agp_addr - return the address in the AGP address space
+ *
+ * @bo: TTM BO which needs the address, must be in GTT domain
+ *
+ * Tries to figure out how to access the BO through the AGP aperture. Returns
+ * AMDGPU_BO_INVALID_OFFSET if that is not possible.
+ */
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+
+	if (!bo->ttm)
+		return AMDGPU_BO_INVALID_OFFSET;
+
+	if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
+		return AMDGPU_BO_INVALID_OFFSET;
+
+	if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
+		return AMDGPU_BO_INVALID_OFFSET;
+
+	return adev->gmc.agp_start + bo->ttm->dma_address[0];
+}
+
+/**
+ * amdgpu_gmc_vram_location - try to find VRAM location
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ * @base: base address at which to put VRAM
+ *
+ * Function will try to place VRAM at base address provided
+ * as parameter.
+ */
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+			      u64 base)
+{
+	uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
+	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
+
+	mc->vram_start = base;
+	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	if (limit < mc->real_vram_size)
+		mc->real_vram_size = limit;
+
+	if (vis_limit && vis_limit < mc->visible_vram_size)
+		mc->visible_vram_size = vis_limit;
+
+	if (mc->real_vram_size < mc->visible_vram_size)
+		mc->visible_vram_size = mc->real_vram_size;
+
+	if (mc->xgmi.num_physical_nodes == 0) {
+		mc->fb_start = mc->vram_start;
+		mc->fb_end = mc->vram_end;
+	}
+	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
+}
+
+/** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * This function is only used if use GART for FB translation. In such
+ * case, we use sysvm aperture (vmid0 page tables) for both vram
+ * and gart (aka system memory) access.
+ *
+ * GPUVM (and our organization of vmid0 page tables) require sysvm
+ * aperture to be placed at a location aligned with 8 times of native
+ * page size. For example, if vm_context0_cntl.page_table_block_size
+ * is 12, then native page size is 8G (2M*2^12), sysvm should start
+ * with a 64G aligned address. For simplicity, we just put sysvm at
+ * address 0. So vram start at address 0 and gart is right after vram.
+ */
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+	u64 hive_vram_start = 0;
+	u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
+	mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
+	mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
+	/* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
+	mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
+	mc->gart_end = mc->gart_start + mc->gart_size - 1;
+	if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+		/* set mc->vram_start to 0 to switch the returned GPU address of
+		 * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
+		 */
+		mc->vram_start = 0;
+		mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+		mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
+	} else {
+		mc->fb_start = hive_vram_start;
+		mc->fb_end = hive_vram_end;
+	}
+	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
+	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_gart_location - try to find GART location
+ *
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ * @gart_placement: GART placement policy with respect to VRAM
+ *
+ * Function will try to place GART before or after VRAM.
+ * If GART size is bigger than space left then we ajust GART size.
+ * Thus function will never fails.
+ */
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+			      enum amdgpu_gart_placement gart_placement)
+{
+	u64 size_af, size_bf;
+	/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
+	u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
+
+	/* VCE doesn't like it when BOs cross a 4GB segment, so align
+	 * the GART base on a 4GB boundary as well.
+	 */
+	size_bf = mc->fb_start;
+	size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
+
+	if (mc->gart_size > max(size_bf, size_af)) {
+		dev_warn(adev->dev, "limiting GART\n");
+		mc->gart_size = max(size_bf, size_af);
+	}
+
+	switch (gart_placement) {
+	case AMDGPU_GART_PLACEMENT_HIGH:
+		mc->gart_start = max_mc_address - mc->gart_size + 1;
+		break;
+	case AMDGPU_GART_PLACEMENT_LOW:
+		mc->gart_start = 0;
+		break;
+	case AMDGPU_GART_PLACEMENT_BEST_FIT:
+	default:
+		if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+		    (size_af < mc->gart_size))
+			mc->gart_start = 0;
+		else
+			mc->gart_start = max_mc_address - mc->gart_size + 1;
+		break;
+	}
+
+	mc->gart_start &= ~(four_gb - 1);
+	mc->gart_end = mc->gart_start + mc->gart_size - 1;
+	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_agp_location - try to find AGP location
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * Function will place try to find a place for the AGP BAR in the MC address
+ * space.
+ *
+ * AGP BAR will be assigned the largest available hole in the address space.
+ * Should be called after VRAM and GART locations are setup.
+ */
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+	const uint64_t sixteen_gb = 1ULL << 34;
+	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
+	u64 size_af, size_bf;
+
+	if (mc->fb_start > mc->gart_start) {
+		size_bf = (mc->fb_start & sixteen_gb_mask) -
+			ALIGN(mc->gart_end + 1, sixteen_gb);
+		size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
+	} else {
+		size_bf = mc->fb_start & sixteen_gb_mask;
+		size_af = (mc->gart_start & sixteen_gb_mask) -
+			ALIGN(mc->fb_end + 1, sixteen_gb);
+	}
+
+	if (size_bf > size_af) {
+		mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
+		mc->agp_size = size_bf;
+	} else {
+		mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
+		mc->agp_size = size_af;
+	}
+
+	mc->agp_end = mc->agp_start + mc->agp_size - 1;
+	dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
+			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
+}
+
+/**
+ * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
+ * @adev: amdgpu device structure holding all necessary information
+ * @mc: memory controller structure holding memory information
+ *
+ * To disable the AGP aperture, you need to set the start to a larger
+ * value than the end.  This function sets the default value which
+ * can then be overridden using amdgpu_gmc_agp_location() if you want
+ * to enable the AGP aperture on a specific chip.
+ *
+ */
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+				struct amdgpu_gmc *mc)
+{
+	mc->agp_start = 0xffffffffffff;
+	mc->agp_end = 0;
+	mc->agp_size = 0;
+}
+
+/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+	return addr << 4 | pasid;
+}
+
+/**
+ * amdgpu_gmc_filter_faults - filter VM faults
+ *
+ * @adev: amdgpu device structure
+ * @ih: interrupt ring that the fault received from
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ * @timestamp: timestamp of the fault
+ *
+ * Returns:
+ * True if the fault was filtered and should not be processed further.
+ * False if the fault is a new one and needs to be handled.
+ */
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+			      struct amdgpu_ih_ring *ih, uint64_t addr,
+			      uint16_t pasid, uint64_t timestamp)
+{
+	struct amdgpu_gmc *gmc = &adev->gmc;
+	uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
+	struct amdgpu_gmc_fault *fault;
+	uint32_t hash;
+
+	/* Stale retry fault if timestamp goes backward */
+	if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
+		return true;
+
+	/* If we don't have space left in the ring buffer return immediately */
+	stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
+		AMDGPU_GMC_FAULT_TIMEOUT;
+	if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
+		return true;
+
+	/* Try to find the fault in the hash */
+	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+	while (fault->timestamp >= stamp) {
+		uint64_t tmp;
+
+		if (atomic64_read(&fault->key) == key) {
+			/*
+			 * if we get a fault which is already present in
+			 * the fault_ring and the timestamp of
+			 * the fault is after the expired timestamp,
+			 * then this is a new fault that needs to be added
+			 * into the fault ring.
+			 */
+			if (fault->timestamp_expiry != 0 &&
+			    amdgpu_ih_ts_after(fault->timestamp_expiry,
+					       timestamp))
+				break;
+			else
+				return true;
+		}
+
+		tmp = fault->timestamp;
+		fault = &gmc->fault_ring[fault->next];
+
+		/* Check if the entry was reused */
+		if (fault->timestamp >= tmp)
+			break;
+	}
+
+	/* Add the fault to the ring */
+	fault = &gmc->fault_ring[gmc->last_fault];
+	atomic64_set(&fault->key, key);
+	fault->timestamp = timestamp;
+
+	/* And update the hash */
+	fault->next = gmc->fault_hash[hash].idx;
+	gmc->fault_hash[hash].idx = gmc->last_fault++;
+	return false;
+}
+
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+				     uint16_t pasid)
+{
+	struct amdgpu_gmc *gmc = &adev->gmc;
+	uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+	struct amdgpu_ih_ring *ih;
+	struct amdgpu_gmc_fault *fault;
+	uint32_t last_wptr;
+	uint64_t last_ts;
+	uint32_t hash;
+	uint64_t tmp;
+
+	if (adev->irq.retry_cam_enabled)
+		return;
+
+	ih = &adev->irq.ih1;
+	/* Get the WPTR of the last entry in IH ring */
+	last_wptr = amdgpu_ih_get_wptr(adev, ih);
+	/* Order wptr with ring data. */
+	rmb();
+	/* Get the timetamp of the last entry in IH ring */
+	last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
+
+	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+	do {
+		if (atomic64_read(&fault->key) == key) {
+			/*
+			 * Update the timestamp when this fault
+			 * expired.
+			 */
+			fault->timestamp_expiry = last_ts;
+			break;
+		}
+
+		tmp = fault->timestamp;
+		fault = &gmc->fault_ring[fault->next];
+	} while (fault->timestamp < tmp);
+}
+
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	/* umc ras block */
+	r = amdgpu_umc_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	/* mmhub ras block */
+	r = amdgpu_mmhub_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	/* hdp ras block */
+	r = amdgpu_hdp_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	/* mca.x ras block */
+	r = amdgpu_mca_mp0_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_mca_mp1_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	r = amdgpu_mca_mpio_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	/* xgmi ras block */
+	r = amdgpu_xgmi_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
+{
+
+}
+
+	/*
+	 * The latest engine allocation on gfx9/10 is:
+	 * Engine 2, 3: firmware
+	 * Engine 0, 1, 4~16: amdgpu ring,
+	 *                    subject to change when ring number changes
+	 * Engine 17: Gart flushes
+	 */
+#define AMDGPU_VMHUB_INV_ENG_BITMAP		0x1FFF3
+
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
+	unsigned i;
+	unsigned vmhub, inv_eng;
+	struct amdgpu_ring *shared_ring;
+
+	/* init the vm inv eng for all vmhubs */
+	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
+		vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
+		/* reserve engine 5 for firmware */
+		if (adev->enable_mes)
+			vm_inv_engs[i] &= ~(1 << 5);
+		/* reserve engine 6 for uni mes */
+		if (adev->enable_uni_mes)
+			vm_inv_engs[i] &= ~(1 << 6);
+		/* reserve mmhub engine 3 for firmware */
+		if (adev->enable_umsch_mm)
+			vm_inv_engs[i] &= ~(1 << 3);
+	}
+
+	for (i = 0; i < adev->num_rings; ++i) {
+		ring = adev->rings[i];
+		vmhub = ring->vm_hub;
+
+		if (ring == &adev->mes.ring[0] ||
+		    ring == &adev->mes.ring[1] ||
+		    ring == &adev->umsch_mm.ring ||
+		    ring == &adev->cper.ring_buf)
+			continue;
+
+		/* Skip if the ring is a shared ring */
+		if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
+			continue;
+
+		inv_eng = ffs(vm_inv_engs[vmhub]);
+		if (!inv_eng) {
+			dev_err(adev->dev, "no VM inv eng for ring %s\n",
+				ring->name);
+			return -EINVAL;
+		}
+
+		ring->vm_inv_eng = inv_eng - 1;
+		vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
+
+		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
+			 ring->name, ring->vm_inv_eng, ring->vm_hub);
+		/* SDMA has a special packet which allows it to use the same
+		 * invalidation engine for all the rings in one instance.
+		 * Therefore, we do not allocate a separate VM invalidation engine
+		 * for SDMA page rings. Instead, they share the VM invalidation
+		 * engine with the SDMA gfx ring. This change ensures efficient
+		 * resource management and avoids the issue of insufficient VM
+		 * invalidation engines.
+		 */
+		shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
+		if (shared_ring) {
+			shared_ring->vm_inv_eng = ring->vm_inv_eng;
+			dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
+					ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+			      uint32_t vmhub, uint32_t flush_type)
+{
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+	struct dma_fence *fence;
+	struct amdgpu_job *job;
+	int r;
+
+	if (!hub->sdma_invalidation_workaround || vmid ||
+	    !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
+	    !ring->sched.ready) {
+		/*
+		 * A GPU reset should flush all TLBs anyway, so no need to do
+		 * this while one is ongoing.
+		 */
+		if (!down_read_trylock(&adev->reset_domain->sem))
+			return;
+
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+							   vmhub, 2);
+
+		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
+							   vmhub, 0);
+
+		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
+						   flush_type);
+		up_read(&adev->reset_domain->sem);
+		return;
+	}
+
+	/* The SDMA on Navi 1x has a bug which can theoretically result in memory
+	 * corruption if an invalidation happens at the same time as an VA
+	 * translation. Avoid this by doing the invalidation from the SDMA
+	 * itself at least for GART.
+	 */
+	mutex_lock(&adev->mman.gtt_window_lock);
+	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+				     &job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);
+	if (r)
+		goto error_alloc;
+
+	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+	job->vm_needs_flush = true;
+	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	fence = amdgpu_job_submit(job);
+	mutex_unlock(&adev->mman.gtt_window_lock);
+
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	return;
+
+error_alloc:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+				   uint32_t flush_type, bool all_hub,
+				   uint32_t inst)
+{
+	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
+	unsigned int ndw;
+	int r, cnt = 0;
+	uint32_t seq;
+
+	/*
+	 * A GPU reset should flush all TLBs anyway, so no need to do
+	 * this while one is ongoing.
+	 */
+	if (!down_read_trylock(&adev->reset_domain->sem))
+		return 0;
+
+	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+								 2, all_hub,
+								 inst);
+
+		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
+			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+								 0, all_hub,
+								 inst);
+
+		adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
+							 flush_type, all_hub,
+							 inst);
+		r = 0;
+	} else {
+		/* 2 dwords flush + 8 dwords fence */
+		ndw = kiq->pmf->invalidate_tlbs_size + 8;
+
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			ndw += kiq->pmf->invalidate_tlbs_size;
+
+		if (adev->gmc.flush_tlb_needs_extra_type_0)
+			ndw += kiq->pmf->invalidate_tlbs_size;
+
+		spin_lock(&adev->gfx.kiq[inst].ring_lock);
+		r = amdgpu_ring_alloc(ring, ndw);
+		if (r) {
+			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+			goto error_unlock_reset;
+		}
+		if (adev->gmc.flush_tlb_needs_extra_type_2)
+			kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
+
+		if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
+			kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
+
+		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
+		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+			goto error_unlock_reset;
+		}
+
+		amdgpu_ring_commit(ring);
+		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
+
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+		might_sleep();
+		while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+		       !amdgpu_reset_pending(adev->reset_domain)) {
+			msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+			r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+		}
+
+		if (cnt > MAX_KIQ_REG_TRY) {
+			dev_err(adev->dev, "timeout waiting for kiq fence\n");
+			r = -ETIME;
+		} else
+			r = 0;
+	}
+
+error_unlock_reset:
+	up_read(&adev->reset_domain->sem);
+	return r;
+}
+
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+				      uint32_t reg0, uint32_t reg1,
+				      uint32_t ref, uint32_t mask,
+				      uint32_t xcc_inst)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
+	struct amdgpu_ring *ring = &kiq->ring;
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq;
+
+	if (adev->mes.ring[0].sched.ready) {
+		amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+					      ref, mask);
+		return;
+	}
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+	amdgpu_ring_alloc(ring, 32);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+					    ref, mask);
+	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+	if (r)
+		goto failed_undo;
+
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for IRQ context */
+	if (r < 1 && in_interrupt())
+		goto failed_kiq;
+
+	might_sleep();
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+	       !amdgpu_reset_pending(adev->reset_domain)) {
+
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq;
+
+	return;
+
+failed_undo:
+	amdgpu_ring_undo(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq:
+	dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+}
+
+/**
+ * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
+ * @adev: amdgpu_device pointer
+ *
+ * Check and set if an the device @adev supports Trusted Memory
+ * Zones (TMZ).
+ */
+void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	/* RAVEN */
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 1, 0):
+	/* RENOIR looks like RAVEN */
+	case IP_VERSION(9, 3, 0):
+	/* GC 10.3.7 */
+	case IP_VERSION(10, 3, 7):
+	/* GC 11.0.1 */
+	case IP_VERSION(11, 0, 1):
+		if (amdgpu_tmz == 0) {
+			adev->gmc.tmz_enabled = false;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
+		} else {
+			adev->gmc.tmz_enabled = true;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature enabled\n");
+		}
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	/* VANGOGH */
+	case IP_VERSION(10, 3, 1):
+	/* YELLOW_CARP*/
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		/* Don't enable it by default yet.
+		 */
+		if (amdgpu_tmz < 1) {
+			adev->gmc.tmz_enabled = false;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
+		} else {
+			adev->gmc.tmz_enabled = true;
+			dev_info(adev->dev,
+				 "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
+		}
+		break;
+	default:
+		adev->gmc.tmz_enabled = false;
+		dev_info(adev->dev,
+			 "Trusted Memory Zone (TMZ) feature not supported\n");
+		break;
+	}
+}
+
+/**
+ * amdgpu_gmc_noretry_set -- set per asic noretry defaults
+ * @adev: amdgpu_device pointer
+ *
+ * Set a per asic default for the no-retry parameter.
+ *
+ */
+void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
+{
+	struct amdgpu_gmc *gmc = &adev->gmc;
+	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+	bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+				gc_ver == IP_VERSION(9, 4, 0) ||
+				gc_ver == IP_VERSION(9, 4, 1) ||
+				gc_ver == IP_VERSION(9, 4, 2) ||
+				gc_ver == IP_VERSION(9, 4, 3) ||
+				gc_ver == IP_VERSION(9, 4, 4) ||
+				gc_ver == IP_VERSION(9, 5, 0) ||
+				gc_ver >= IP_VERSION(10, 3, 0));
+
+	if (!amdgpu_sriov_xnack_support(adev))
+		gmc->noretry = 1;
+	else
+		gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
+}
+
+void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
+				   bool enable)
+{
+	struct amdgpu_vmhub *hub;
+	u32 tmp, reg, i;
+
+	hub = &adev->vmhub[hub_type];
+	for (i = 0; i < 16; i++) {
+		reg = hub->vm_context0_cntl + hub->ctx_distance * i;
+
+		tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
+			RREG32_SOC15_IP(GC, reg) :
+			RREG32_SOC15_IP(MMHUB, reg);
+
+		if (enable)
+			tmp |= hub->vm_cntx_cntl_vm_fault;
+		else
+			tmp &= ~hub->vm_cntx_cntl_vm_fault;
+
+		(hub_type == AMDGPU_GFXHUB(0)) ?
+			WREG32_SOC15_IP(GC, reg, tmp) :
+			WREG32_SOC15_IP(MMHUB, reg, tmp);
+	}
+}
+
+void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
+{
+	unsigned size;
+
+	/*
+	 * Some ASICs need to reserve a region of video memory to avoid access
+	 * from driver
+	 */
+	adev->mman.stolen_reserved_offset = 0;
+	adev->mman.stolen_reserved_size = 0;
+
+	/*
+	 * TODO:
+	 * Currently there is a bug where some memory client outside
+	 * of the driver writes to first 8M of VRAM on S3 resume,
+	 * this overrides GART which by default gets placed in first 8M and
+	 * causes VM_FAULTS once GTT is accessed.
+	 * Keep the stolen memory reservation until the while this is not solved.
+	 */
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+		adev->mman.keep_stolen_vga_memory = true;
+		/*
+		 * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
+		 */
+#ifdef CONFIG_X86
+		if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
+			adev->mman.stolen_reserved_offset = 0x500000;
+			adev->mman.stolen_reserved_size = 0x200000;
+		}
+#endif
+		break;
+	case CHIP_RAVEN:
+	case CHIP_RENOIR:
+		adev->mman.keep_stolen_vga_memory = true;
+		break;
+	default:
+		adev->mman.keep_stolen_vga_memory = false;
+		break;
+	}
+
+	if (amdgpu_sriov_vf(adev) ||
+	    !amdgpu_device_has_display_hardware(adev)) {
+		size = 0;
+	} else {
+		size = amdgpu_gmc_get_vbios_fb_size(adev);
+
+		if (adev->mman.keep_stolen_vga_memory)
+			size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
+	}
+
+	/* set to 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		size = 0;
+
+	if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
+		adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
+		adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
+	} else {
+		adev->mman.stolen_vga_size = size;
+		adev->mman.stolen_extended_size = 0;
+	}
+}
+
+/**
+ * amdgpu_gmc_init_pdb0 - initialize PDB0
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This function is only used when GART page table is used
+ * for FB address translatioin. In such a case, we construct
+ * a 2-level system VM page table: PDB0->PTB, to cover both
+ * VRAM of the hive and system memory.
+ *
+ * PDB0 is static, initialized once on driver initialization.
+ * The first n entries of PDB0 are used as PTE by setting
+ * P bit to 1, pointing to VRAM. The n+1'th entry points
+ * to a big PTB covering system memory.
+ *
+ */
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
+{
+	int i;
+	uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
+	/* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
+	 */
+	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
+	u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
+	u64 vram_addr, vram_end;
+	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+	int idx;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return;
+
+	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+	flags |= AMDGPU_PTE_WRITEABLE;
+	flags |= AMDGPU_PTE_SNOOPED;
+	flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
+	flags |= AMDGPU_PDE_PTE_FLAG(adev);
+
+	vram_addr = adev->vm_manager.vram_base_offset;
+	if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+		vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+	vram_end = vram_addr + vram_size;
+
+	/* The first n PDE0 entries are used as PTE,
+	 * pointing to vram
+	 */
+	for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
+		amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
+
+	/* The n+1'th PDE0 entry points to a huge
+	 * PTB who has more than 512 entries each
+	 * pointing to a 4K system page
+	 */
+	flags = AMDGPU_PTE_VALID;
+	flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
+	/* Requires gart_ptb_gpu_pa to be 4K aligned */
+	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+	drm_dev_exit(idx);
+}
+
+/**
+ * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
+ * address
+ *
+ * @adev: amdgpu_device pointer
+ * @mc_addr: MC address of buffer
+ */
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
+{
+	return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
+}
+
+/**
+ * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
+ * GPU's view
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: amdgpu buffer object
+ */
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+	return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
+}
+
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
+{
+	struct amdgpu_bo *vram_bo = NULL;
+	uint64_t vram_gpu = 0;
+	void *vram_ptr = NULL;
+
+	int ret, size = 0x100000;
+	uint8_t cptr[10];
+
+	ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_VRAM,
+				&vram_bo,
+				&vram_gpu,
+				&vram_ptr);
+	if (ret)
+		return ret;
+
+	memset(vram_ptr, 0x86, size);
+	memset(cptr, 0x86, 10);
+
+	/**
+	 * Check the start, the mid, and the end of the memory if the content of
+	 * each byte is the pattern "0x86". If yes, we suppose the vram bo is
+	 * workable.
+	 *
+	 * Note: If check the each byte of whole 1M bo, it will cost too many
+	 * seconds, so here, we just pick up three parts for emulation.
+	 */
+	ret = memcmp(vram_ptr, cptr, 10);
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
+
+	ret = memcmp(vram_ptr + (size / 2), cptr, 10);
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
+
+	ret = memcmp(vram_ptr + size - 10, cptr, 10);
+	if (ret) {
+		ret = -EIO;
+		goto release_buffer;
+	}
+
+release_buffer:
+	amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
+			&vram_ptr);
+
+	return ret;
+}
+
+static const char *nps_desc[] = {
+	[AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+	[AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+	[AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+	[AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+	[AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+	[AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+					       struct device_attribute *addr,
+					       char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	int size = 0, mode;
+	char *sep = "";
+
+	for_each_inst(mode, adev->gmc.supported_nps_modes) {
+		size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+		sep = ", ";
+	}
+	size += sysfs_emit_at(buf, size, "\n");
+
+	return size;
+}
+
+static ssize_t current_memory_partition_store(struct device *dev,
+					      struct device_attribute *attr,
+					      const char *buf, size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	enum amdgpu_memory_partition mode;
+	struct amdgpu_hive_info *hive;
+	int i;
+
+	mode = UNKNOWN_MEMORY_PARTITION_MODE;
+	for_each_inst(i, adev->gmc.supported_nps_modes) {
+		if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+			mode = i;
+			break;
+		}
+	}
+
+	if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+		return -EINVAL;
+
+	if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+		dev_info(
+			adev->dev,
+			"requested NPS mode is same as current NPS mode, skipping\n");
+		return count;
+	}
+
+	/* If device is part of hive, all devices in the hive should request the
+	 * same mode. Hence store the requested mode in hive.
+	 */
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		atomic_set(&hive->requested_nps_mode, mode);
+		amdgpu_put_xgmi_hive(hive);
+	} else {
+		adev->gmc.requested_nps_mode = mode;
+	}
+
+	dev_info(
+		adev->dev,
+		"NPS mode change requested, please remove and reload the driver\n");
+
+	return count;
+}
+
+static ssize_t current_memory_partition_show(
+	struct device *dev, struct device_attribute *addr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	enum amdgpu_memory_partition mode;
+
+	/* Only minimal precaution taken to reject requests while in reset */
+	if (amdgpu_in_reset(adev))
+		return -EPERM;
+
+	mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	if ((mode >= ARRAY_SIZE(nps_desc)) ||
+	    (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
+		return sysfs_emit(buf, "UNKNOWN\n");
+
+	return sysfs_emit(buf, "%s\n", nps_desc[mode]);
+}
+
+static DEVICE_ATTR_RW(current_memory_partition);
+static DEVICE_ATTR_RO(available_memory_partition);
+
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
+{
+	bool nps_switch_support;
+	int r = 0;
+
+	if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+		return 0;
+
+	nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
+					AMDGPU_ALL_NPS_MASK) > 1);
+	if (!nps_switch_support)
+		dev_attr_current_memory_partition.attr.mode &=
+			~(S_IWUSR | S_IWGRP | S_IWOTH);
+	else
+		r = device_create_file(adev->dev,
+				       &dev_attr_available_memory_partition);
+
+	if (r)
+		return r;
+
+	return device_create_file(adev->dev,
+				  &dev_attr_current_memory_partition);
+}
+
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+		return;
+
+	device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+	device_remove_file(adev->dev, &dev_attr_available_memory_partition);
+}
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+				 struct amdgpu_mem_partition_info *mem_ranges,
+				 uint8_t *exp_ranges)
+{
+	struct amdgpu_gmc_memrange *ranges;
+	int range_cnt, ret, i, j;
+	uint32_t nps_type;
+	bool refresh;
+
+	if (!mem_ranges || !exp_ranges)
+		return -EINVAL;
+
+	refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+		  (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
+	ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
+					    &range_cnt, refresh);
+
+	if (ret)
+		return ret;
+
+	/* TODO: For now, expect ranges and partition count to be the same.
+	 * Adjust if there are holes expected in any NPS domain.
+	 */
+	if (*exp_ranges && (range_cnt != *exp_ranges)) {
+		dev_warn(
+			adev->dev,
+			"NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
+			*exp_ranges, nps_type, range_cnt);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < range_cnt; ++i) {
+		if (ranges[i].base_address >= ranges[i].limit_address) {
+			dev_warn(
+				adev->dev,
+				"Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
+				nps_type, i, ranges[i].base_address,
+				ranges[i].limit_address);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		/* Check for overlaps, not expecting any now */
+		for (j = i - 1; j >= 0; j--) {
+			if (max(ranges[j].base_address,
+				ranges[i].base_address) <=
+			    min(ranges[j].limit_address,
+				ranges[i].limit_address)) {
+				dev_warn(
+					adev->dev,
+					"overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
+					ranges[j].base_address,
+					ranges[j].limit_address,
+					ranges[i].base_address,
+					ranges[i].limit_address);
+				ret = -EINVAL;
+				goto err;
+			}
+		}
+
+		mem_ranges[i].range.fpfn =
+			(ranges[i].base_address -
+			 adev->vm_manager.vram_base_offset) >>
+			AMDGPU_GPU_PAGE_SHIFT;
+		mem_ranges[i].range.lpfn =
+			(ranges[i].limit_address -
+			 adev->vm_manager.vram_base_offset) >>
+			AMDGPU_GPU_PAGE_SHIFT;
+		mem_ranges[i].size =
+			ranges[i].limit_address - ranges[i].base_address + 1;
+	}
+
+	if (!*exp_ranges)
+		*exp_ranges = range_cnt;
+err:
+	kfree(ranges);
+
+	return ret;
+}
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+					int nps_mode)
+{
+	/* Not supported on VF devices and APUs */
+	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+		return -EOPNOTSUPP;
+
+	if (!adev->psp.funcs) {
+		dev_err(adev->dev,
+			"PSP interface not available for nps mode change request");
+		return -EINVAL;
+	}
+
+	return psp_memory_partition(&adev->psp, nps_mode);
+}
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+						  int req_nps_mode,
+						  int cur_nps_mode)
+{
+	return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+			BIT(req_nps_mode)) &&
+		req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+	int req_nps_mode, cur_nps_mode, r;
+	struct amdgpu_hive_info *hive;
+
+	if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+	    !adev->gmc.gmc_funcs->request_mem_partition_mode)
+		return;
+
+	cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (hive) {
+		req_nps_mode = atomic_read(&hive->requested_nps_mode);
+		if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+						    cur_nps_mode)) {
+			amdgpu_put_xgmi_hive(hive);
+			return;
+		}
+		r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+		amdgpu_put_xgmi_hive(hive);
+		goto out;
+	}
+
+	req_nps_mode = adev->gmc.requested_nps_mode;
+	if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+		return;
+
+	/* even if this fails, we should let driver unload w/o blocking */
+	r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+	if (r)
+		dev_err(adev->dev, "NPS mode change request failed\n");
+	else
+		dev_info(
+			adev->dev,
+			"NPS mode change request done, reload driver to complete the change\n");
+}
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+	if (adev->gmc.gmc_funcs->need_reset_on_init)
+		return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+	return false;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
+{
+	switch (adev->gmc.num_mem_partitions) {
+	case 0:
+		return UNKNOWN_MEMORY_PARTITION_MODE;
+	case 1:
+		return AMDGPU_NPS1_PARTITION_MODE;
+	case 2:
+		return AMDGPU_NPS2_PARTITION_MODE;
+	case 4:
+		return AMDGPU_NPS4_PARTITION_MODE;
+	case 8:
+		return AMDGPU_NPS8_PARTITION_MODE;
+	default:
+		return AMDGPU_NPS1_PARTITION_MODE;
+	}
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+	enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+	if (adev->nbio.funcs &&
+	    adev->nbio.funcs->get_memory_partition_mode)
+		mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+								   supp_modes);
+	else
+		dev_warn(adev->dev, "memory partition mode query is not supported\n");
+
+	return mode;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		return amdgpu_gmc_get_vf_memory_partition(adev);
+	else
+		return amdgpu_gmc_get_memory_partition(adev, NULL);
+}
+
+static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev)
+{
+	enum amdgpu_memory_partition mode;
+	u32 supp_modes;
+	bool valid;
+
+	mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+	/* Mode detected by hardware not present in supported modes */
+	if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+	    !(BIT(mode - 1) & supp_modes))
+		return false;
+
+	switch (mode) {
+	case UNKNOWN_MEMORY_PARTITION_MODE:
+	case AMDGPU_NPS1_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 1);
+		break;
+	case AMDGPU_NPS2_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 2);
+		break;
+	case AMDGPU_NPS4_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 3 ||
+			 adev->gmc.num_mem_partitions == 4);
+		break;
+	case AMDGPU_NPS8_PARTITION_MODE:
+		valid = (adev->gmc.num_mem_partitions == 8);
+		break;
+	default:
+		valid = false;
+	}
+
+	return valid;
+}
+
+static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid)
+{
+	int i;
+
+	/* Check if node with id 'nid' is present in 'node_ids' array */
+	for (i = 0; i < num_ids; ++i)
+		if (node_ids[i] == nid)
+			return true;
+
+	return false;
+}
+
+static void
+amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev,
+				struct amdgpu_mem_partition_info *mem_ranges)
+{
+	struct amdgpu_numa_info numa_info;
+	int node_ids[AMDGPU_MAX_MEM_RANGES];
+	int num_ranges = 0, ret;
+	int num_xcc, xcc_id;
+	uint32_t xcc_mask;
+
+	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+	xcc_mask = (1U << num_xcc) - 1;
+
+	for_each_inst(xcc_id, xcc_mask)	{
+		ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+		if (ret)
+			continue;
+
+		if (numa_info.nid == NUMA_NO_NODE) {
+			mem_ranges[0].size = numa_info.size;
+			mem_ranges[0].numa.node = numa_info.nid;
+			num_ranges = 1;
+			break;
+		}
+
+		if (amdgpu_gmc_is_node_present(node_ids, num_ranges,
+					     numa_info.nid))
+			continue;
+
+		node_ids[num_ranges] = numa_info.nid;
+		mem_ranges[num_ranges].numa.node = numa_info.nid;
+		mem_ranges[num_ranges].size = numa_info.size;
+		++num_ranges;
+	}
+
+	adev->gmc.num_mem_partitions = num_ranges;
+}
+
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+				   struct amdgpu_mem_partition_info *mem_ranges)
+{
+	enum amdgpu_memory_partition mode;
+	u32 start_addr = 0, size;
+	int i, r, l;
+
+	mode = amdgpu_gmc_query_memory_partition(adev);
+
+	switch (mode) {
+	case UNKNOWN_MEMORY_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 0;
+		break;
+	case AMDGPU_NPS1_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 1;
+		break;
+	case AMDGPU_NPS2_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 2;
+		break;
+	case AMDGPU_NPS4_PARTITION_MODE:
+		if (adev->flags & AMD_IS_APU)
+			adev->gmc.num_mem_partitions = 3;
+		else
+			adev->gmc.num_mem_partitions = 4;
+		break;
+	case AMDGPU_NPS8_PARTITION_MODE:
+		adev->gmc.num_mem_partitions = 8;
+		break;
+	default:
+		adev->gmc.num_mem_partitions = 1;
+		break;
+	}
+
+	/* Use NPS range info, if populated */
+	r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+					 &adev->gmc.num_mem_partitions);
+	if (!r) {
+		l = 0;
+		for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+			if (mem_ranges[i].range.lpfn >
+			    mem_ranges[i - 1].range.lpfn)
+				l = i;
+		}
+
+	} else {
+		if (!adev->gmc.num_mem_partitions) {
+			dev_warn(adev->dev,
+				 "Not able to detect NPS mode, fall back to NPS1\n");
+			adev->gmc.num_mem_partitions = 1;
+		}
+		/* Fallback to sw based calculation */
+		size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+		size /= adev->gmc.num_mem_partitions;
+
+		for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+			mem_ranges[i].range.fpfn = start_addr;
+			mem_ranges[i].size =
+				((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+			mem_ranges[i].range.lpfn = start_addr + size - 1;
+			start_addr += size;
+		}
+
+		l = adev->gmc.num_mem_partitions - 1;
+	}
+
+	/* Adjust the last one */
+	mem_ranges[l].range.lpfn =
+		(adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+	mem_ranges[l].size =
+		adev->gmc.real_vram_size -
+		((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
+{
+	bool valid;
+
+	adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES,
+					   sizeof(struct amdgpu_mem_partition_info),
+					   GFP_KERNEL);
+	if (!adev->gmc.mem_partitions)
+		return -ENOMEM;
+
+	if (adev->gmc.is_app_apu)
+		amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+	else
+		amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+	if (amdgpu_sriov_vf(adev))
+		valid = true;
+	else
+		valid = amdgpu_gmc_validate_partition_info(adev);
+	if (!valid) {
+		/* TODO: handle invalid case */
+		dev_warn(adev->dev,
+			 "Mem ranges not matching with hardware config\n");
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
new file mode 100644
index 000000000000..727342689d4b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -0,0 +1,476 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef __AMDGPU_GMC_H__
+#define __AMDGPU_GMC_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_irq.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
+
+/* VA hole for 48bit addresses on Vega10 */
+#define AMDGPU_GMC_HOLE_START	0x0000800000000000ULL
+#define AMDGPU_GMC_HOLE_END	0xffff800000000000ULL
+
+/*
+ * Hardware is programmed as if the hole doesn't exists with start and end
+ * address values.
+ *
+ * This mask is used to remove the upper 16bits of the VA and so come up with
+ * the linear addr value.
+ */
+#define AMDGPU_GMC_HOLE_MASK	0x0000ffffffffffffULL
+
+/*
+ * Ring size as power of two for the log of recent faults.
+ */
+#define AMDGPU_GMC_FAULT_RING_ORDER	8
+#define AMDGPU_GMC_FAULT_RING_SIZE	(1 << AMDGPU_GMC_FAULT_RING_ORDER)
+
+/*
+ * Hash size as power of two for the log of recent faults
+ */
+#define AMDGPU_GMC_FAULT_HASH_ORDER	8
+#define AMDGPU_GMC_FAULT_HASH_SIZE	(1 << AMDGPU_GMC_FAULT_HASH_ORDER)
+
+/*
+ * Number of IH timestamp ticks until a fault is considered handled
+ */
+#define AMDGPU_GMC_FAULT_TIMEOUT	5000ULL
+
+/* XNACK flags */
+#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
+
+struct firmware;
+
+enum amdgpu_memory_partition {
+	UNKNOWN_MEMORY_PARTITION_MODE = 0,
+	AMDGPU_NPS1_PARTITION_MODE = 1,
+	AMDGPU_NPS2_PARTITION_MODE = 2,
+	AMDGPU_NPS3_PARTITION_MODE = 3,
+	AMDGPU_NPS4_PARTITION_MODE = 4,
+	AMDGPU_NPS6_PARTITION_MODE = 6,
+	AMDGPU_NPS8_PARTITION_MODE = 8,
+};
+
+#define AMDGPU_ALL_NPS_MASK                                                  \
+	(BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \
+	 BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
+	 BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
+
+#define AMDGPU_GMC_INIT_RESET_NPS  BIT(0)
+
+#define AMDGPU_MAX_MEM_RANGES 8
+
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ  0x40
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
+#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE   0x10
+
+/*
+ * GMC page fault information
+ */
+struct amdgpu_gmc_fault {
+	uint64_t	timestamp:48;
+	uint64_t	next:AMDGPU_GMC_FAULT_RING_ORDER;
+	atomic64_t	key;
+	uint64_t	timestamp_expiry:48;
+};
+
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub_funcs {
+	void (*print_l2_protection_fault_status)(struct amdgpu_device *adev,
+						 uint32_t status);
+	uint32_t (*get_invalidate_req)(unsigned int vmid, uint32_t flush_type);
+};
+
+struct amdgpu_vmhub {
+	uint32_t	ctx0_ptb_addr_lo32;
+	uint32_t	ctx0_ptb_addr_hi32;
+	uint32_t	vm_inv_eng0_sem;
+	uint32_t	vm_inv_eng0_req;
+	uint32_t	vm_inv_eng0_ack;
+	uint32_t	vm_context0_cntl;
+	uint32_t	vm_l2_pro_fault_status;
+	uint32_t	vm_l2_pro_fault_cntl;
+
+	/*
+	 * store the register distances between two continuous context domain
+	 * and invalidation engine.
+	 */
+	uint32_t	ctx_distance;
+	uint32_t	ctx_addr_distance; /* include LO32/HI32 */
+	uint32_t	eng_distance;
+	uint32_t	eng_addr_distance; /* include LO32/HI32 */
+
+	uint32_t        vm_cntx_cntl;
+	uint32_t	vm_cntx_cntl_vm_fault;
+	uint32_t	vm_l2_bank_select_reserved_cid2;
+
+	uint32_t	vm_contexts_disable;
+
+	bool		sdma_invalidation_workaround;
+
+	const struct amdgpu_vmhub_funcs *vmhub_funcs;
+};
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct amdgpu_gmc_funcs {
+	/* flush the vm tlb via mmio */
+	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+				uint32_t vmhub, uint32_t flush_type);
+	/* flush the vm tlb via pasid */
+	void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+				    uint32_t flush_type, bool all_hub,
+				    uint32_t inst);
+	/* flush the vm tlb via ring */
+	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
+				       uint64_t pd_addr);
+	/* Change the VMID -> PASID mapping */
+	void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
+				   unsigned pasid);
+	/* enable/disable PRT support */
+	void (*set_prt)(struct amdgpu_device *adev, bool enable);
+	/* get the pde for a given mc addr */
+	void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+			   u64 *dst, u64 *flags);
+	/* get the pte flags to use for PTEs */
+	void (*get_vm_pte)(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_bo *bo,
+			   uint32_t vm_flags,
+			   uint64_t *pte_flags);
+	/* override per-page pte flags */
+	void (*override_vm_pte_flags)(struct amdgpu_device *dev,
+				      struct amdgpu_vm *vm,
+				      uint64_t addr, uint64_t *flags);
+	/* get the amount of memory used by the vbios for pre-OS console */
+	unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
+	/* get the DCC buffer alignment */
+	unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev);
+
+	enum amdgpu_memory_partition (*query_mem_partition_mode)(
+		struct amdgpu_device *adev);
+	/* Request NPS mode */
+	int (*request_mem_partition_mode)(struct amdgpu_device *adev,
+					  int nps_mode);
+	bool (*need_reset_on_init)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_mem_partition_info {
+	union {
+		struct {
+			uint32_t fpfn;
+			uint32_t lpfn;
+		} range;
+		struct {
+			int node;
+		} numa;
+	};
+	uint64_t size;
+};
+
+#define INVALID_PFN    -1
+
+struct amdgpu_gmc_memrange {
+	uint64_t base_address;
+	uint64_t limit_address;
+	uint32_t flags;
+	int nid_mask;
+};
+
+enum amdgpu_gart_placement {
+	AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
+	AMDGPU_GART_PLACEMENT_HIGH,
+	AMDGPU_GART_PLACEMENT_LOW,
+};
+
+struct amdgpu_gmc {
+	/* FB's physical address in MMIO space (for CPU to
+	 * map FB). This is different compared to the agp/
+	 * gart/vram_start/end field as the later is from
+	 * GPU's view and aper_base is from CPU's view.
+	 */
+	resource_size_t		aper_size;
+	resource_size_t		aper_base;
+	/* for some chips with <= 32MB we need to lie
+	 * about vram size near mc fb location */
+	u64			mc_vram_size;
+	u64			visible_vram_size;
+	/* AGP aperture start and end in MC address space
+	 * Driver find a hole in the MC address space
+	 * to place AGP by setting MC_VM_AGP_BOT/TOP registers
+	 * Under VMID0, logical address == MC address. AGP
+	 * aperture maps to physical bus or IOVA addressed.
+	 * AGP aperture is used to simulate FB in ZFB case.
+	 * AGP aperture is also used for page table in system
+	 * memory (mainly for APU).
+	 *
+	 */
+	u64			agp_size;
+	u64			agp_start;
+	u64			agp_end;
+	/* GART aperture start and end in MC address space
+	 * Driver find a hole in the MC address space
+	 * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR
+	 * registers
+	 * Under VMID0, logical address inside GART aperture will
+	 * be translated through gpuvm gart page table to access
+	 * paged system memory
+	 */
+	u64			gart_size;
+	u64			gart_start;
+	u64			gart_end;
+	/* Frame buffer aperture of this GPU device. Different from
+	 * fb_start (see below), this only covers the local GPU device.
+	 * If driver uses FB aperture to access FB, driver get fb_start from
+	 * MC_VM_FB_LOCATION_BASE (set by vbios) and calculate vram_start
+	 * of this local device by adding an offset inside the XGMI hive.
+	 * If driver uses GART table for VMID0 FB access, driver finds a hole in
+	 * VMID0's virtual address space to place the SYSVM aperture inside
+	 * which the first part is vram and the second part is gart (covering
+	 * system ram).
+	 */
+	u64			vram_start;
+	u64			vram_end;
+	/* FB region , it's same as local vram region in single GPU, in XGMI
+	 * configuration, this region covers all GPUs in the same hive ,
+	 * each GPU in the hive has the same view of this FB region .
+	 * GPU0's vram starts at offset (0 * segment size) ,
+	 * GPU1 starts at offset (1 * segment size), etc.
+	 */
+	u64			fb_start;
+	u64			fb_end;
+	unsigned		vram_width;
+	u64			real_vram_size;
+	int			vram_mtrr;
+	u64                     mc_mask;
+	const struct firmware   *fw;	/* MC firmware */
+	uint32_t                fw_version;
+	struct amdgpu_irq_src	vm_fault;
+	uint32_t		vram_type;
+	uint8_t			vram_vendor;
+	uint32_t                srbm_soft_reset;
+	bool			prt_warning;
+	uint32_t		sdpif_register;
+	/* apertures */
+	u64			shared_aperture_start;
+	u64			shared_aperture_end;
+	u64			private_aperture_start;
+	u64			private_aperture_end;
+	/* protects concurrent invalidation */
+	spinlock_t		invalidate_lock;
+	bool			translate_further;
+	struct kfd_vm_fault_info *vm_fault_info;
+	atomic_t		vm_fault_info_updated;
+
+	struct amdgpu_gmc_fault	fault_ring[AMDGPU_GMC_FAULT_RING_SIZE];
+	struct {
+		uint64_t	idx:AMDGPU_GMC_FAULT_RING_ORDER;
+	} fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE];
+	uint64_t		last_fault:AMDGPU_GMC_FAULT_RING_ORDER;
+
+	bool tmz_enabled;
+	bool is_app_apu;
+
+	struct amdgpu_mem_partition_info *mem_partitions;
+	uint8_t num_mem_partitions;
+	const struct amdgpu_gmc_funcs	*gmc_funcs;
+	enum amdgpu_memory_partition	requested_nps_mode;
+	uint32_t supported_nps_modes;
+	uint32_t reset_flags;
+
+	struct amdgpu_xgmi xgmi;
+	struct amdgpu_irq_src	ecc_irq;
+	int noretry;
+	uint32_t xnack_flags;
+
+	uint32_t	vmid0_page_table_block_size;
+	uint32_t	vmid0_page_table_depth;
+	struct amdgpu_bo		*pdb0_bo;
+	/* CPU kmapped address of pdb0*/
+	void				*ptr_pdb0;
+
+	/* MALL size */
+	u64 mall_size;
+	uint32_t m_half_use;
+
+	/* number of UMC instances */
+	int num_umc;
+	/* mode2 save restore */
+	u64 VM_L2_CNTL;
+	u64 VM_L2_CNTL2;
+	u64 VM_DUMMY_PAGE_FAULT_CNTL;
+	u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32;
+	u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32;
+	u64 VM_L2_PROTECTION_FAULT_CNTL;
+	u64 VM_L2_PROTECTION_FAULT_CNTL2;
+	u64 VM_L2_PROTECTION_FAULT_MM_CNTL3;
+	u64 VM_L2_PROTECTION_FAULT_MM_CNTL4;
+	u64 VM_L2_PROTECTION_FAULT_ADDR_LO32;
+	u64 VM_L2_PROTECTION_FAULT_ADDR_HI32;
+	u64 VM_DEBUG;
+	u64 VM_L2_MM_GROUP_RT_CLASSES;
+	u64 VM_L2_BANK_SELECT_RESERVED_CID;
+	u64 VM_L2_BANK_SELECT_RESERVED_CID2;
+	u64 VM_L2_CACHE_PARITY_CNTL;
+	u64 VM_L2_IH_LOG_CNTL;
+	u64 VM_CONTEXT_CNTL[16];
+	u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16];
+	u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16];
+	u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16];
+	u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16];
+	u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16];
+	u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16];
+	u64 MC_VM_MX_L1_TLB_CNTL;
+
+	u64 noretry_flags;
+
+	bool flush_tlb_needs_extra_type_0;
+	bool flush_tlb_needs_extra_type_2;
+	bool flush_pasid_uses_kiq;
+};
+
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
+	((adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
+					   (pte_flags)))
+#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags)	\
+	(adev)->gmc.gmc_funcs->override_vm_pte_flags			\
+		((adev), (vm), (addr), (pte_flags))
+#define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
+#define amdgpu_gmc_get_dcc_alignment(adev) ({			\
+	typeof(adev) _adev = (adev);				\
+	_adev->gmc.gmc_funcs->get_dcc_alignment(_adev);		\
+})
+
+/**
+ * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * True if full VRAM is visible through the BAR
+ */
+static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
+{
+	WARN_ON(gmc->real_vram_size < gmc->visible_vram_size);
+
+	return (gmc->real_vram_size == gmc->visible_vram_size);
+}
+
+/**
+ * amdgpu_gmc_sign_extend - sign extend the given gmc address
+ *
+ * @addr: address to extend
+ */
+static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
+{
+	if (addr >= AMDGPU_GMC_HOLE_START)
+		addr |= AMDGPU_GMC_HOLE_END;
+
+	return addr;
+}
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev);
+int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev);
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+			       uint64_t *addr, uint64_t *flags);
+int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+				uint32_t gpu_page_idx, uint64_t addr,
+				uint64_t flags);
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
+void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc);
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+			      u64 base);
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
+			      struct amdgpu_gmc *mc,
+			      enum amdgpu_gart_placement gart_placement);
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
+			     struct amdgpu_gmc *mc);
+void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
+				struct amdgpu_gmc *mc);
+bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
+			      struct amdgpu_ih_ring *ih, uint64_t addr,
+			      uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+				     uint16_t pasid);
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
+int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
+void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+			      uint32_t vmhub, uint32_t flush_type);
+int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
+				   uint32_t flush_type, bool all_hub,
+				   uint32_t inst);
+void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
+				      uint32_t reg0, uint32_t reg1,
+				      uint32_t ref, uint32_t mask,
+				      uint32_t xcc_inst);
+
+extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
+extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
+
+extern void
+amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
+			      bool enable);
+
+void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
+
+void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
+uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
+uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
+int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
+
+int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
+				 struct amdgpu_mem_partition_info *mem_ranges,
+				 uint8_t *exp_ranges);
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+					int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev);
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes);
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev);
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev);
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+				   struct amdgpu_mem_partition_info *mem_ranges);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
new file mode 100644
index 000000000000..895c1e4c6747
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu.h"
+
+static inline struct amdgpu_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct amdgpu_gtt_mgr, manager);
+}
+
+/**
+ * DOC: mem_info_gtt_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total size of
+ * the GTT.
+ * The file mem_info_gtt_total is used for this, and returns the total size of
+ * the GTT block, in bytes
+ */
+static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct ttm_resource_manager *man;
+
+	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+	return sysfs_emit(buf, "%llu\n", man->size);
+}
+
+/**
+ * DOC: mem_info_gtt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used GTT.
+ * The file mem_info_gtt_used is used for this, and returns the current used
+ * size of the GTT block, in bytes
+ */
+static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager;
+
+	return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
+	           amdgpu_mem_info_gtt_total_show, NULL);
+static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
+	           amdgpu_mem_info_gtt_used_show, NULL);
+
+static struct attribute *amdgpu_gtt_mgr_attributes[] = {
+	&dev_attr_mem_info_gtt_total.attr,
+	&dev_attr_mem_info_gtt_used.attr,
+	NULL
+};
+
+const struct attribute_group amdgpu_gtt_mgr_attr_group = {
+	.attrs = amdgpu_gtt_mgr_attributes
+};
+
+/**
+ * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
+ *
+ * @res: the mem object to check
+ *
+ * Check if a mem object has already address space allocated.
+ */
+bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
+{
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+
+	return drm_mm_node_allocated(&node->mm_nodes[0]);
+}
+
+/**
+ * amdgpu_gtt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Dummy, allocate the node but no space for it yet.
+ */
+static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
+			      struct ttm_buffer_object *tbo,
+			      const struct ttm_place *place,
+			      struct ttm_resource **res)
+{
+	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+	uint32_t num_pages = PFN_UP(tbo->base.size);
+	struct ttm_range_mgr_node *node;
+	int r;
+
+	node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &node->base);
+	if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+	    ttm_resource_manager_usage(man) > man->size) {
+		r = -ENOSPC;
+		goto err_free;
+	}
+
+	if (place->lpfn) {
+		spin_lock(&mgr->lock);
+		r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
+						num_pages, tbo->page_alignment,
+						0, place->fpfn, place->lpfn,
+						DRM_MM_INSERT_BEST);
+		spin_unlock(&mgr->lock);
+		if (unlikely(r))
+			goto err_free;
+
+		node->base.start = node->mm_nodes[0].start;
+	} else {
+		node->mm_nodes[0].start = 0;
+		node->mm_nodes[0].size = PFN_UP(node->base.size);
+		node->base.start = AMDGPU_BO_INVALID_OFFSET;
+	}
+
+	*res = &node->base;
+	return 0;
+
+err_free:
+	ttm_resource_fini(man, &node->base);
+	kfree(node);
+	return r;
+}
+
+/**
+ * amdgpu_gtt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
+			       struct ttm_resource *res)
+{
+	struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+
+	spin_lock(&mgr->lock);
+	if (drm_mm_node_allocated(&node->mm_nodes[0]))
+		drm_mm_remove_node(&node->mm_nodes[0]);
+	spin_unlock(&mgr->lock);
+
+	ttm_resource_fini(man, res);
+	kfree(node);
+}
+
+/**
+ * amdgpu_gtt_mgr_recover - re-init gart
+ *
+ * @mgr: amdgpu_gtt_mgr pointer
+ *
+ * Re-init the gart for each known BO in the GTT.
+ */
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
+{
+	struct ttm_range_mgr_node *node;
+	struct drm_mm_node *mm_node;
+	struct amdgpu_device *adev;
+
+	adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
+	spin_lock(&mgr->lock);
+	drm_mm_for_each_node(mm_node, &mgr->mm) {
+		node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+		amdgpu_ttm_recover_gart(node->base.bo);
+	}
+	spin_unlock(&mgr->lock);
+}
+
+/**
+ * amdgpu_gtt_mgr_intersects - test for intersection
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified intersection test, only interesting if we need GART or not.
+ */
+static bool amdgpu_gtt_mgr_intersects(struct ttm_resource_manager *man,
+				      struct ttm_resource *res,
+				      const struct ttm_place *place,
+				      size_t size)
+{
+	return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_compatible - test for compatibility
+ *
+ * @man: Our manager object
+ * @res: The resource to test
+ * @place: The place for the new allocation
+ * @size: The size of the new allocation
+ *
+ * Simplified compatibility test.
+ */
+static bool amdgpu_gtt_mgr_compatible(struct ttm_resource_manager *man,
+				      struct ttm_resource *res,
+				      const struct ttm_place *place,
+				      size_t size)
+{
+	return !place->lpfn || amdgpu_gtt_mgr_has_gart_addr(res);
+}
+
+/**
+ * amdgpu_gtt_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @printer: DRM printer to use
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man,
+				 struct drm_printer *printer)
+{
+	struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
+
+	spin_lock(&mgr->lock);
+	drm_mm_print(&mgr->mm, printer);
+	spin_unlock(&mgr->lock);
+}
+
+static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
+	.alloc = amdgpu_gtt_mgr_new,
+	.free = amdgpu_gtt_mgr_del,
+	.intersects = amdgpu_gtt_mgr_intersects,
+	.compatible = amdgpu_gtt_mgr_compatible,
+	.debug = amdgpu_gtt_mgr_debug
+};
+
+/**
+ * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt_size: maximum size of GTT
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+{
+	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	uint64_t start, size;
+
+	man->use_tt = true;
+	man->func = &amdgpu_gtt_mgr_func;
+
+	ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size);
+
+	start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+	start += amdgpu_vce_required_gart_pages(adev);
+	size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+	drm_mm_init(&mgr->mm, start, size);
+	spin_lock_init(&mgr->lock);
+
+	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+	return 0;
+}
+
+/**
+ * amdgpu_gtt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	int ret;
+
+	ttm_resource_manager_set_used(man, false);
+
+	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+	if (ret)
+		return;
+
+	spin_lock(&mgr->lock);
+	drm_mm_takedown(&mgr->mm);
+	spin_unlock(&mgr->lock);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 000000000000..5a60d69a3e1f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_hdp_ras *ras;
+
+	if (!adev->hdp.ras)
+		return 0;
+
+	ras = adev->hdp.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register hdp ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "hdp");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__HDP;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->hdp.ras_if = &ras->ras_block.ras_comm;
+
+	/* hdp ras follows amdgpu_ras_block_late_init_default for late init */
+	return 0;
+}
+
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32((adev->rmmio_remap.reg_offset +
+			KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+			       2,
+		       0);
+		if (adev->nbio.funcs->get_memsize)
+			adev->nbio.funcs->get_memsize(adev);
+	} else {
+		amdgpu_ring_emit_wreg(ring,
+				      (adev->rmmio_remap.reg_offset +
+				       KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+					      2,
+				      0);
+	}
+}
+
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (adev->asic_funcs && adev->asic_funcs->invalidate_hdp)
+		adev->asic_funcs->invalidate_hdp(adev, ring);
+	else if (adev->hdp.funcs && adev->hdp.funcs->invalidate_hdp)
+		adev->hdp.funcs->invalidate_hdp(adev, ring);
+}
+
+void amdgpu_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (adev->asic_funcs && adev->asic_funcs->flush_hdp)
+		adev->asic_funcs->flush_hdp(adev, ring);
+	else if (adev->hdp.funcs && adev->hdp.funcs->flush_hdp)
+		adev->hdp.funcs->flush_hdp(adev, ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
new file mode 100644
index 000000000000..d9f488fa76b9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_HDP_H__
+#define __AMDGPU_HDP_H__
+#include "amdgpu_ras.h"
+
+struct amdgpu_hdp_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_hdp_funcs {
+	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+	void (*invalidate_hdp)(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring);
+	void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
+	void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
+	void (*init_registers)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_hdp {
+	struct ras_common_if			*ras_if;
+	const struct amdgpu_hdp_funcs		*funcs;
+	struct amdgpu_hdp_ras	*ras;
+};
+
+int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring);
+void amdgpu_hdp_invalidate(struct amdgpu_device *adev,
+			   struct amdgpu_ring *ring);
+void amdgpu_hdp_flush(struct amdgpu_device *adev,
+		      struct amdgpu_ring *ring);
+#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
new file mode 100644
index 000000000000..90d26d820bac
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_hmm.h"
+
+#define MAX_WALK_BYTE	(2UL << 30)
+
+/**
+ * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
+ */
+static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
+				      const struct mmu_notifier_range *range,
+				      unsigned long cur_seq)
+{
+	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	long r;
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	mutex_lock(&adev->notifier_lock);
+
+	mmu_interval_set_seq(mni, cur_seq);
+
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+				  false, MAX_SCHEDULE_TIMEOUT);
+	mutex_unlock(&adev->notifier_lock);
+	if (r <= 0)
+		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = {
+	.invalidate = amdgpu_hmm_invalidate_gfx,
+};
+
+/**
+ * amdgpu_hmm_invalidate_hsa - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
+ */
+static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
+				      const struct mmu_notifier_range *range,
+				      unsigned long cur_seq)
+{
+	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
+	.invalidate = amdgpu_hmm_invalidate_hsa,
+};
+
+/**
+ * amdgpu_hmm_register - register a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers a mmu_notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+	int r;
+
+	if (bo->kfd_bo)
+		r = mmu_interval_notifier_insert(&bo->notifier, current->mm,
+						    addr, amdgpu_bo_size(bo),
+						    &amdgpu_hmm_hsa_ops);
+	else
+		r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+							amdgpu_bo_size(bo),
+							&amdgpu_hmm_gfx_ops);
+	if (r)
+		/*
+		 * Make sure amdgpu_hmm_unregister() doesn't call
+		 * mmu_interval_notifier_remove() when the notifier isn't properly
+		 * initialized.
+		 */
+		bo->notifier.mm = NULL;
+
+	return r;
+}
+
+/**
+ * amdgpu_hmm_unregister - unregister a BO for notifier updates
+ *
+ * @bo: amdgpu buffer object
+ *
+ * Remove any registration of mmu notifier updates from the buffer object.
+ */
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
+{
+	if (!bo->notifier.mm)
+		return;
+	mmu_interval_notifier_remove(&bo->notifier);
+	bo->notifier.mm = NULL;
+}
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+			       uint64_t start, uint64_t npages, bool readonly,
+			       void *owner,
+			       struct amdgpu_hmm_range *range)
+{
+	unsigned long end;
+	unsigned long timeout;
+	unsigned long *pfns;
+	int r = 0;
+	struct hmm_range *hmm_range = &range->hmm_range;
+
+	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+	if (unlikely(!pfns)) {
+		r = -ENOMEM;
+		goto out_free_range;
+	}
+
+	hmm_range->notifier = notifier;
+	hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+	if (!readonly)
+		hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+	hmm_range->hmm_pfns = pfns;
+	hmm_range->start = start;
+	end = start + npages * PAGE_SIZE;
+	hmm_range->dev_private_owner = owner;
+
+	do {
+		hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+			hmm_range->start, hmm_range->end);
+
+		timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+		r = hmm_range_fault(hmm_range);
+		if (unlikely(r)) {
+			if (r == -EBUSY && !time_after(jiffies, timeout))
+				goto retry;
+			goto out_free_pfns;
+		}
+
+		if (hmm_range->end == end)
+			break;
+		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+		hmm_range->start = hmm_range->end;
+	} while (hmm_range->end < end);
+
+	hmm_range->start = start;
+	hmm_range->hmm_pfns = pfns;
+
+	return 0;
+
+out_free_pfns:
+	kvfree(pfns);
+	hmm_range->hmm_pfns = NULL;
+out_free_range:
+	if (r == -EBUSY)
+		r = -EAGAIN;
+	return r;
+}
+
+/**
+ * amdgpu_hmm_range_valid - check if an HMM range is still valid
+ * @range: pointer to the &struct amdgpu_hmm_range to validate
+ *
+ * Determines whether the given HMM range @range is still valid by
+ * checking for invalidations via the MMU notifier sequence. This is
+ * typically used to verify that the range has not been invalidated
+ * by concurrent address space updates before it is accessed.
+ *
+ * Return:
+ * * true if @range is valid and can be used safely
+ * * false if @range is NULL or has been invalidated
+ */
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+	if (!range)
+		return false;
+
+	return !mmu_interval_read_retry(range->hmm_range.notifier,
+					range->hmm_range.notifier_seq);
+}
+
+/**
+ * amdgpu_hmm_range_alloc - allocate and initialize an AMDGPU HMM range
+ * @bo: optional buffer object to associate with this HMM range
+ *
+ * Allocates memory for amdgpu_hmm_range and associates it with the @bo passed.
+ * The reference count of the @bo is incremented.
+ *
+ * Return:
+ * Pointer to a newly allocated struct amdgpu_hmm_range on success,
+ * or NULL if memory allocation fails.
+ */
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+	struct amdgpu_hmm_range *range;
+
+	range = kzalloc(sizeof(*range), GFP_KERNEL);
+	if (!range)
+		return NULL;
+
+	range->bo = amdgpu_bo_ref(bo);
+	return range;
+}
+
+/**
+ * amdgpu_hmm_range_free - release an AMDGPU HMM range
+ * @range: pointer to the range object to free
+ *
+ * Releases all resources held by @range, including the associated
+ * hmm_pfns and the dropping reference of associated bo if any.
+ *
+ * Return: void
+ */
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range)
+{
+	if (!range)
+		return;
+
+	kvfree(range->hmm_range.hmm_pfns);
+	amdgpu_bo_unref(&range->bo);
+	kfree(range);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
new file mode 100644
index 000000000000..140bc9cd57b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_MN_H__
+#define __AMDGPU_MN_H__
+
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
+
+struct amdgpu_hmm_range {
+	struct hmm_range hmm_range;
+	struct amdgpu_bo *bo;
+};
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+			       uint64_t start, uint64_t npages, bool readonly,
+			       void *owner,
+			       struct amdgpu_hmm_range *range);
+
+#if defined(CONFIG_HMM_MIRROR)
+bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range);
+struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo);
+void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range);
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
+#else
+static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
+{
+	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+		      "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
+	return -ENODEV;
+}
+
+static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
+
+static inline bool amdgpu_hmm_range_valid(struct amdgpu_hmm_range *range)
+{
+	return false;
+}
+
+static inline struct amdgpu_hmm_range *amdgpu_hmm_range_alloc(struct amdgpu_bo *bo)
+{
+	return NULL;
+}
+
+static inline void amdgpu_hmm_range_free(struct amdgpu_hmm_range *range) {}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
new file mode 100644
index 000000000000..9cb72f0c5277
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <linux/pci.h>
+
+#include <drm/drm_edid.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_i2c.h"
+#include "amdgpu_atombios.h"
+#include "atom.h"
+#include "atombios_dp.h"
+#include "atombios_i2c.h"
+
+/* bit banging i2c */
+static int amdgpu_i2c_pre_xfer(struct i2c_adapter *i2c_adap)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t temp;
+
+	mutex_lock(&i2c->mutex);
+
+	/* switch the pads to ddc mode */
+	if (rec->hw_capable) {
+		temp = RREG32(rec->mask_clk_reg);
+		temp &= ~(1 << 16);
+		WREG32(rec->mask_clk_reg, temp);
+	}
+
+	/* clear the output pin values */
+	temp = RREG32(rec->a_clk_reg) & ~rec->a_clk_mask;
+	WREG32(rec->a_clk_reg, temp);
+
+	temp = RREG32(rec->a_data_reg) & ~rec->a_data_mask;
+	WREG32(rec->a_data_reg, temp);
+
+	/* set the pins to input */
+	temp = RREG32(rec->en_clk_reg) & ~rec->en_clk_mask;
+	WREG32(rec->en_clk_reg, temp);
+
+	temp = RREG32(rec->en_data_reg) & ~rec->en_data_mask;
+	WREG32(rec->en_data_reg, temp);
+
+	/* mask the gpio pins for software use */
+	temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
+	temp = RREG32(rec->mask_data_reg);
+
+	return 0;
+}
+
+static void amdgpu_i2c_post_xfer(struct i2c_adapter *i2c_adap)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t temp;
+
+	/* unmask the gpio pins for software use */
+	temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
+	temp = RREG32(rec->mask_data_reg);
+
+	mutex_unlock(&i2c->mutex);
+}
+
+static int amdgpu_i2c_get_clock(void *i2c_priv)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_priv;
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	/* read the value off the pin */
+	val = RREG32(rec->y_clk_reg);
+	val &= rec->y_clk_mask;
+
+	return (val != 0);
+}
+
+
+static int amdgpu_i2c_get_data(void *i2c_priv)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_priv;
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	/* read the value off the pin */
+	val = RREG32(rec->y_data_reg);
+	val &= rec->y_data_mask;
+
+	return (val != 0);
+}
+
+static void amdgpu_i2c_set_clock(void *i2c_priv, int clock)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_priv;
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	/* set pin direction */
+	val = RREG32(rec->en_clk_reg) & ~rec->en_clk_mask;
+	val |= clock ? 0 : rec->en_clk_mask;
+	WREG32(rec->en_clk_reg, val);
+}
+
+static void amdgpu_i2c_set_data(void *i2c_priv, int data)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_priv;
+	struct amdgpu_device *adev = drm_to_adev(i2c->dev);
+	struct amdgpu_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t val;
+
+	/* set pin direction */
+	val = RREG32(rec->en_data_reg) & ~rec->en_data_mask;
+	val |= data ? 0 : rec->en_data_mask;
+	WREG32(rec->en_data_reg, val);
+}
+
+static const struct i2c_algorithm amdgpu_atombios_i2c_algo = {
+	.master_xfer = amdgpu_atombios_i2c_xfer,
+	.functionality = amdgpu_atombios_i2c_func,
+};
+
+struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+					  const struct amdgpu_i2c_bus_rec *rec,
+					  const char *name)
+{
+	struct amdgpu_i2c_chan *i2c;
+	int ret;
+
+	/* don't add the mm_i2c bus unless hw_i2c is enabled */
+	if (rec->mm_i2c && (amdgpu_hw_i2c == 0))
+		return NULL;
+
+	i2c = kzalloc(sizeof(struct amdgpu_i2c_chan), GFP_KERNEL);
+	if (i2c == NULL)
+		return NULL;
+
+	i2c->rec = *rec;
+	i2c->adapter.owner = THIS_MODULE;
+	i2c->adapter.dev.parent = dev->dev;
+	i2c->dev = dev;
+	i2c_set_adapdata(&i2c->adapter, i2c);
+	mutex_init(&i2c->mutex);
+	if (rec->hw_capable &&
+	    amdgpu_hw_i2c) {
+		/* hw i2c using atom */
+		snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+			 "AMDGPU i2c hw bus %s", name);
+		i2c->adapter.algo = &amdgpu_atombios_i2c_algo;
+		ret = devm_i2c_add_adapter(dev->dev, &i2c->adapter);
+		if (ret)
+			goto out_free;
+	} else {
+		/* set the amdgpu bit adapter */
+		snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
+			 "AMDGPU i2c bit bus %s", name);
+		i2c->adapter.algo_data = &i2c->bit;
+		i2c->bit.pre_xfer = amdgpu_i2c_pre_xfer;
+		i2c->bit.post_xfer = amdgpu_i2c_post_xfer;
+		i2c->bit.setsda = amdgpu_i2c_set_data;
+		i2c->bit.setscl = amdgpu_i2c_set_clock;
+		i2c->bit.getsda = amdgpu_i2c_get_data;
+		i2c->bit.getscl = amdgpu_i2c_get_clock;
+		i2c->bit.udelay = 10;
+		i2c->bit.timeout = usecs_to_jiffies(2200);	/* from VESA */
+		i2c->bit.data = i2c;
+		ret = i2c_bit_add_bus(&i2c->adapter);
+		if (ret) {
+			DRM_ERROR("Failed to register bit i2c %s\n", name);
+			goto out_free;
+		}
+	}
+
+	return i2c;
+out_free:
+	kfree(i2c);
+	return NULL;
+
+}
+
+void amdgpu_i2c_init(struct amdgpu_device *adev)
+{
+	if (!adev->is_atom_fw) {
+		if (!amdgpu_device_has_dc_support(adev)) {
+			amdgpu_atombios_i2c_init(adev);
+		} else {
+			switch (adev->asic_type) {
+			case CHIP_POLARIS10:
+			case CHIP_POLARIS11:
+			case CHIP_POLARIS12:
+				amdgpu_atombios_oem_i2c_init(adev, 0x97);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+}
+
+/* remove all the buses */
+void amdgpu_i2c_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++)
+		if (adev->i2c_bus[i])
+			adev->i2c_bus[i] = NULL;
+}
+
+/* looks up bus based on id */
+struct amdgpu_i2c_chan *
+amdgpu_i2c_lookup(struct amdgpu_device *adev,
+		  const struct amdgpu_i2c_bus_rec *i2c_bus)
+{
+	int i;
+
+	for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
+		if (adev->i2c_bus[i] &&
+		    (adev->i2c_bus[i]->rec.i2c_id == i2c_bus->i2c_id)) {
+			return adev->i2c_bus[i];
+		}
+	}
+	return NULL;
+}
+
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+				 u8 slave_addr,
+				 u8 addr,
+				 u8 *val)
+{
+	u8 out_buf[2];
+	u8 in_buf[2];
+	struct i2c_msg msgs[] = {
+		{
+			.addr = slave_addr,
+			.flags = 0,
+			.len = 1,
+			.buf = out_buf,
+		},
+		{
+			.addr = slave_addr,
+			.flags = I2C_M_RD,
+			.len = 1,
+			.buf = in_buf,
+		}
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = 0;
+
+	if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) {
+		DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+		return -EIO;
+	}
+
+	*val = in_buf[0];
+	DRM_DEBUG("val = 0x%02x\n", *val);
+
+	return 0;
+}
+
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+				 u8 slave_addr,
+				 u8 addr,
+				 u8 val)
+{
+	uint8_t out_buf[2];
+	struct i2c_msg msg = {
+		.addr = slave_addr,
+		.flags = 0,
+		.len = 2,
+		.buf = out_buf,
+	};
+
+	out_buf[0] = addr;
+	out_buf[1] = val;
+
+	if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) {
+		DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* ddc router switching */
+void
+amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connector)
+{
+	u8 val = 0;
+
+	if (!amdgpu_connector->router.ddc_valid)
+		return;
+
+	if (!amdgpu_connector->router_bus)
+		return;
+
+	if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x3, &val))
+		return;
+	val &= ~amdgpu_connector->router.ddc_mux_control_pin;
+	amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x3, val);
+	if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x1, &val))
+		return;
+	val &= ~amdgpu_connector->router.ddc_mux_control_pin;
+	val |= amdgpu_connector->router.ddc_mux_state;
+	amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x1, val);
+}
+
+/* clock/data router switching */
+void
+amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector)
+{
+	u8 val;
+
+	if (!amdgpu_connector->router.cd_valid)
+		return;
+
+	if (!amdgpu_connector->router_bus)
+		return;
+
+	if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x3, &val))
+		return;
+	val &= ~amdgpu_connector->router.cd_mux_control_pin;
+	amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x3, val);
+	if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x1, &val))
+		return;
+	val &= ~amdgpu_connector->router.cd_mux_control_pin;
+	val |= amdgpu_connector->router.cd_mux_state;
+	amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
+			    amdgpu_connector->router.i2c_addr,
+			    0x1, val);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
new file mode 100644
index 000000000000..1d3d3806e0dd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_I2C_H__
+#define __AMDGPU_I2C_H__
+
+struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
+					  const struct amdgpu_i2c_bus_rec *rec,
+					  const char *name);
+void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
+void amdgpu_i2c_init(struct amdgpu_device *adev);
+void amdgpu_i2c_fini(struct amdgpu_device *adev);
+struct amdgpu_i2c_chan *
+amdgpu_i2c_lookup(struct amdgpu_device *adev,
+		  const struct amdgpu_i2c_bus_rec *i2c_bus);
+void
+amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *connector);
+void
+amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *connector);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
new file mode 100644
index 000000000000..586a58facca1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ *          Christian König
+ */
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
+#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT	msecs_to_jiffies(2000)
+
+/*
+ * IB
+ * IBs (Indirect Buffers) and areas of GPU accessible memory where
+ * commands are stored.  You can put a pointer to the IB in the
+ * command ring and the hw will fetch the commands from the IB
+ * and execute them.  Generally userspace acceleration drivers
+ * produce command buffers which are send to the kernel and
+ * put in IBs for execution by the requested ring.
+ */
+
+/**
+ * amdgpu_ib_get - request an IB (Indirect Buffer)
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm pointer
+ * @size: requested IB size
+ * @pool_type: IB pool type (delayed, immediate, direct)
+ * @ib: IB object returned
+ *
+ * Request an IB (all asics).  IBs are allocated using the
+ * suballocator.
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		  unsigned int size, enum amdgpu_ib_pool_type pool_type,
+		  struct amdgpu_ib *ib)
+{
+	int r;
+
+	if (size) {
+		r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
+				     &ib->sa_bo, size);
+		if (r) {
+			dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
+			return r;
+		}
+
+		ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo);
+		/* flush the cache before commit the IB */
+		ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC;
+
+		if (!vm)
+			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ib_free - free an IB (Indirect Buffer)
+ *
+ * @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
+ *
+ * Free an IB (all asics).
+ */
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
+{
+	amdgpu_sa_bo_free(&ib->sa_bo, f);
+}
+
+/**
+ * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
+ *
+ * @ring: ring index the IB is associated with
+ * @num_ibs: number of IBs to schedule
+ * @ibs: IB objects to schedule
+ * @job: job to schedule
+ * @f: fence created during this submission
+ *
+ * Schedule an IB on the associated ring (all asics).
+ * Returns 0 on success, error on failure.
+ *
+ * On SI, there are two parallel engines fed from the primary ring,
+ * the CE (Constant Engine) and the DE (Drawing Engine).  Since
+ * resource descriptors have moved to memory, the CE allows you to
+ * prime the caches while the DE is updating register state so that
+ * the resource descriptors will be already in cache when the draw is
+ * processed.  To accomplish this, the userspace driver submits two
+ * IBs, one for the CE and one for the DE.  If there is a CE IB (called
+ * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
+ * to SI there was just a DE IB.
+ */
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
+		       struct amdgpu_ib *ibs, struct amdgpu_job *job,
+		       struct dma_fence **f)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib *ib = &ibs[0];
+	struct dma_fence *tmp = NULL;
+	struct amdgpu_fence *af;
+	bool need_ctx_switch;
+	struct amdgpu_vm *vm;
+	uint64_t fence_ctx;
+	uint32_t status = 0, alloc_size;
+	unsigned int fence_flags = 0;
+	bool secure, init_shadow;
+	u64 shadow_va, csa_va, gds_va;
+	int vmid = AMDGPU_JOB_GET_VMID(job);
+	bool need_pipe_sync = false;
+	unsigned int cond_exec;
+	unsigned int i;
+	int r = 0;
+
+	if (num_ibs == 0)
+		return -EINVAL;
+
+	/* ring tests don't use a job */
+	if (job) {
+		vm = job->vm;
+		fence_ctx = job->base.s_fence ?
+			job->base.s_fence->finished.context : 0;
+		shadow_va = job->shadow_va;
+		csa_va = job->csa_va;
+		gds_va = job->gds_va;
+		init_shadow = job->init_shadow;
+		af = job->hw_fence;
+		/* Save the context of the job for reset handling.
+		 * The driver needs this so it can skip the ring
+		 * contents for guilty contexts.
+		 */
+		af->context = fence_ctx;
+		/* the vm fence is also part of the job's context */
+		job->hw_vm_fence->context = fence_ctx;
+	} else {
+		vm = NULL;
+		fence_ctx = 0;
+		shadow_va = 0;
+		csa_va = 0;
+		gds_va = 0;
+		init_shadow = false;
+		af = kzalloc(sizeof(*af), GFP_ATOMIC);
+		if (!af)
+			return -ENOMEM;
+	}
+
+	if (!ring->sched.ready) {
+		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
+		r = -EINVAL;
+		goto free_fence;
+	}
+
+	if (vm && !job->vmid) {
+		dev_err(adev->dev, "VM IB without ID\n");
+		r = -EINVAL;
+		goto free_fence;
+	}
+
+	if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
+	    (!ring->funcs->secure_submission_supported)) {
+		dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name);
+		r = -EINVAL;
+		goto free_fence;
+	}
+
+	alloc_size = ring->funcs->emit_frame_size + num_ibs *
+		ring->funcs->emit_ib_size;
+
+	r = amdgpu_ring_alloc(ring, alloc_size);
+	if (r) {
+		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
+		goto free_fence;
+	}
+
+	need_ctx_switch = ring->current_ctx != fence_ctx;
+	if (ring->funcs->emit_pipeline_sync && job &&
+	    ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
+	     need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
+		need_pipe_sync = true;
+
+		if (tmp)
+			trace_amdgpu_ib_pipe_sync(job, tmp);
+
+		dma_fence_put(tmp);
+	}
+
+	if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync)
+		ring->funcs->emit_mem_sync(ring);
+
+	if (ring->funcs->emit_wave_limit &&
+	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
+		ring->funcs->emit_wave_limit(ring, true);
+
+	if (ring->funcs->insert_start)
+		ring->funcs->insert_start(ring);
+
+	if (job) {
+		r = amdgpu_vm_flush(ring, job, need_pipe_sync);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			return r;
+		}
+	}
+
+	amdgpu_ring_ib_begin(ring);
+
+	if (ring->funcs->emit_gfx_shadow)
+		amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+					    init_shadow, vmid);
+
+	if (ring->funcs->init_cond_exec)
+		cond_exec = amdgpu_ring_init_cond_exec(ring,
+						       ring->cond_exe_gpu_addr);
+
+	amdgpu_device_flush_hdp(adev, ring);
+
+	if (need_ctx_switch)
+		status |= AMDGPU_HAVE_CTX_SWITCH;
+
+	if (job && ring->funcs->emit_cntxcntl) {
+		status |= job->preamble_status;
+		status |= job->preemption_status;
+		amdgpu_ring_emit_cntxcntl(ring, status);
+	}
+
+	/* Setup initial TMZiness and send it off.
+	 */
+	secure = false;
+	if (job && ring->funcs->emit_frame_cntl) {
+		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
+		amdgpu_ring_emit_frame_cntl(ring, true, secure);
+	}
+
+	for (i = 0; i < num_ibs; ++i) {
+		ib = &ibs[i];
+
+		if (job && ring->funcs->emit_frame_cntl) {
+			if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
+				amdgpu_ring_emit_frame_cntl(ring, false, secure);
+				secure = !secure;
+				amdgpu_ring_emit_frame_cntl(ring, true, secure);
+			}
+		}
+
+		amdgpu_ring_emit_ib(ring, job, ib, status);
+		status &= ~AMDGPU_HAVE_CTX_SWITCH;
+	}
+
+	if (job && ring->funcs->emit_frame_cntl)
+		amdgpu_ring_emit_frame_cntl(ring, false, secure);
+
+	amdgpu_device_invalidate_hdp(adev, ring);
+
+	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+	/* wrap the last IB with fence */
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
+				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
+	}
+
+	if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
+		amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
+		amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
+	}
+
+	r = amdgpu_fence_emit(ring, af, fence_flags);
+	if (r) {
+		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
+		if (job && job->vmid)
+			amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
+		amdgpu_ring_undo(ring);
+		return r;
+	}
+	*f = &af->base;
+	/* get a ref for the job */
+	if (job)
+		dma_fence_get(*f);
+
+	if (ring->funcs->insert_end)
+		ring->funcs->insert_end(ring);
+
+	amdgpu_ring_patch_cond_exec(ring, cond_exec);
+
+	ring->current_ctx = fence_ctx;
+	if (job && ring->funcs->emit_switch_buffer)
+		amdgpu_ring_emit_switch_buffer(ring);
+
+	if (ring->funcs->emit_wave_limit &&
+	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
+		ring->funcs->emit_wave_limit(ring, false);
+
+	/* Save the wptr associated with this fence.
+	 * This must be last for resets to work properly
+	 * as we need to save the wptr associated with this
+	 * fence so we know what rings contents to backup
+	 * after we reset the queue.
+	 */
+	amdgpu_fence_save_wptr(af);
+
+	amdgpu_ring_ib_end(ring);
+	amdgpu_ring_commit(ring);
+
+	return 0;
+
+free_fence:
+	if (!job)
+		kfree(af);
+	return r;
+}
+
+/**
+ * amdgpu_ib_pool_init - Init the IB (Indirect Buffer) pool
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the suballocator to manage a pool of memory
+ * for use as IBs (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ib_pool_init(struct amdgpu_device *adev)
+{
+	int r, i;
+
+	if (adev->ib_pool_ready)
+		return 0;
+
+	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
+		r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i],
+					      AMDGPU_IB_POOL_SIZE, 256,
+					      AMDGPU_GEM_DOMAIN_GTT);
+		if (r)
+			goto error;
+	}
+	adev->ib_pool_ready = true;
+
+	return 0;
+
+error:
+	while (i--)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	return r;
+}
+
+/**
+ * amdgpu_ib_pool_fini - Free the IB (Indirect Buffer) pool
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the suballocator managing the pool of memory
+ * for use as IBs (all asics).
+ */
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!adev->ib_pool_ready)
+		return;
+
+	for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
+		amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]);
+	adev->ib_pool_ready = false;
+}
+
+/**
+ * amdgpu_ib_ring_tests - test IBs on the rings
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Test an IB (Indirect Buffer) on each ring.
+ * If the test fails, disable the ring.
+ * Returns 0 on success, error if the primary GFX ring
+ * IB test fails.
+ */
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
+{
+	long tmo_gfx, tmo_mm;
+	int r, ret = 0;
+	unsigned int i;
+
+	tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+	if (amdgpu_sriov_vf(adev)) {
+		/* for MM engines in hypervisor side they are not scheduled together
+		 * with CP and SDMA engines, so even in exclusive mode MM engine could
+		 * still running on other VF thus the IB TEST TIMEOUT for MM engines
+		 * under SR-IOV should be set to a long time. 8 sec should be enough
+		 * for the MM comes back to this VF.
+		 */
+		tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	}
+
+	if (amdgpu_sriov_runtime(adev)) {
+		/* for CP & SDMA engines since they are scheduled together so
+		 * need to make the timeout width enough to cover the time
+		 * cost waiting for it coming back under RUNTIME only
+		 */
+		tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+	} else if (adev->gmc.xgmi.hive_id) {
+		tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT;
+	}
+
+	for (i = 0; i < adev->num_rings; ++i) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		long tmo;
+
+		/* KIQ rings don't have an IB test because we never submit IBs
+		 * to them and they have no interrupt support.
+		 */
+		if (!ring->sched.ready || !ring->funcs->test_ib)
+			continue;
+
+		if (adev->enable_mes &&
+		    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+			continue;
+
+		/* MM engine need more time */
+		if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+			tmo = tmo_mm;
+		else
+			tmo = tmo_gfx;
+
+		r = amdgpu_ring_test_ib(ring, tmo);
+		if (!r) {
+			DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
+				      ring->name);
+			continue;
+		}
+
+		ring->sched.ready = false;
+		DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
+			  ring->name, r);
+
+		if (ring == &adev->gfx.gfx_ring[0]) {
+			/* oh, oh, that's really bad */
+			adev->accel_working = false;
+			return r;
+
+		} else {
+			ret = r;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+
+	seq_puts(m, "--------------------- DELAYED ---------------------\n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED],
+				     m);
+	seq_puts(m, "-------------------- IMMEDIATE --------------------\n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE],
+				     m);
+	seq_puts(m, "--------------------- DIRECT ----------------------\n");
+	amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_sa_info);
+
+#endif
+
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_sa_info", 0444, root, adev,
+			    &amdgpu_debugfs_sa_info_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
new file mode 100644
index 000000000000..9cab36322c16
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ids.h"
+
+#include <linux/idr.h>
+#include <linux/dma-fence-array.h>
+
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+/*
+ * PASID manager
+ *
+ * PASIDs are global address space identifiers that can be shared
+ * between the GPU, an IOMMU and the driver. VMs on different devices
+ * may use the same PASID if they share the same address
+ * space. Therefore PASIDs are allocated using a global IDA. VMs are
+ * looked up from the PASID per amdgpu_device.
+ */
+static DEFINE_IDA(amdgpu_pasid_ida);
+
+/* Helper to free pasid from a fence callback */
+struct amdgpu_pasid_cb {
+	struct dma_fence_cb cb;
+	u32 pasid;
+};
+
+/**
+ * amdgpu_pasid_alloc - Allocate a PASID
+ * @bits: Maximum width of the PASID in bits, must be at least 1
+ *
+ * Allocates a PASID of the given width while keeping smaller PASIDs
+ * available if possible.
+ *
+ * Returns a positive integer on success. Returns %-EINVAL if bits==0.
+ * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
+ * memory allocation failure.
+ */
+int amdgpu_pasid_alloc(unsigned int bits)
+{
+	int pasid = -EINVAL;
+
+	for (bits = min(bits, 31U); bits > 0; bits--) {
+		pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1),
+					(1U << bits) - 1, GFP_KERNEL);
+		if (pasid != -ENOSPC)
+			break;
+	}
+
+	if (pasid >= 0)
+		trace_amdgpu_pasid_allocated(pasid);
+
+	return pasid;
+}
+
+/**
+ * amdgpu_pasid_free - Free a PASID
+ * @pasid: PASID to free
+ */
+void amdgpu_pasid_free(u32 pasid)
+{
+	trace_amdgpu_pasid_freed(pasid);
+	ida_free(&amdgpu_pasid_ida, pasid);
+}
+
+static void amdgpu_pasid_free_cb(struct dma_fence *fence,
+				 struct dma_fence_cb *_cb)
+{
+	struct amdgpu_pasid_cb *cb =
+		container_of(_cb, struct amdgpu_pasid_cb, cb);
+
+	amdgpu_pasid_free(cb->pasid);
+	dma_fence_put(fence);
+	kfree(cb);
+}
+
+/**
+ * amdgpu_pasid_free_delayed - free pasid when fences signal
+ *
+ * @resv: reservation object with the fences to wait for
+ * @pasid: pasid to free
+ *
+ * Free the pasid only after all the fences in resv are signaled.
+ */
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
+			       u32 pasid)
+{
+	struct amdgpu_pasid_cb *cb;
+	struct dma_fence *fence;
+	int r;
+
+	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+	if (r)
+		goto fallback;
+
+	if (!fence) {
+		amdgpu_pasid_free(pasid);
+		return;
+	}
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb) {
+		/* Last resort when we are OOM */
+		dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+		amdgpu_pasid_free(pasid);
+	} else {
+		cb->pasid = pasid;
+		if (dma_fence_add_callback(fence, &cb->cb,
+					   amdgpu_pasid_free_cb))
+			amdgpu_pasid_free_cb(fence, &cb->cb);
+	}
+
+	return;
+
+fallback:
+	/* Not enough memory for the delayed delete, as last resort
+	 * block for all the fences to complete.
+	 */
+	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
+			      false, MAX_SCHEDULE_TIMEOUT);
+	amdgpu_pasid_free(pasid);
+}
+
+/*
+ * VMID manager
+ *
+ * VMIDs are a per VMHUB identifier for page tables handling.
+ */
+
+/**
+ * amdgpu_vmid_had_gpu_reset - check if reset occured since last use
+ *
+ * @adev: amdgpu_device pointer
+ * @id: VMID structure
+ *
+ * Check if GPU reset occured since last use of the VMID.
+ */
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+			       struct amdgpu_vmid *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter);
+}
+
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+					  struct amdgpu_job *job)
+{
+	return id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+				   struct amdgpu_job *job)
+{
+	return  id->pd_gpu_addr == job->vm_pd_addr &&
+		!amdgpu_vmid_gds_switch_needed(id, job);
+}
+
+/**
+ * amdgpu_vmid_grab_idle - grab idle VMID
+ *
+ * @ring: ring we want to submit job to
+ * @idle: resulting idle VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Try to find an idle VMID, if none is idle add a fence to wait to the sync
+ * object. Returns -ENOMEM when we are out of memory.
+ */
+static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
+				 struct amdgpu_vmid **idle,
+				 struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->vm_hub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+	/* If anybody is waiting for a VMID let everybody wait for fairness */
+	if (!dma_fence_is_signaled(ring->vmid_wait)) {
+		*fence = dma_fence_get(ring->vmid_wait);
+		return 0;
+	}
+
+	/* Check if we have an idle VMID */
+	list_for_each_entry_reverse((*idle), &id_mgr->ids_lru, list) {
+		/* Don't use per engine and per process VMID at the same time */
+		struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
+			NULL : ring;
+
+		*fence = amdgpu_sync_peek_fence(&(*idle)->active, r);
+		if (!(*fence))
+			return 0;
+	}
+
+	/*
+	 * If we can't find a idle VMID to use, wait on a fence from the least
+	 * recently used in the hope that it will be available soon.
+	 */
+	*idle = NULL;
+	dma_fence_put(ring->vmid_wait);
+	ring->vmid_wait = dma_fence_get(*fence);
+
+	/* This is the reference we return */
+	dma_fence_get(*fence);
+	return 0;
+}
+
+/**
+ * amdgpu_vmid_grab_reserved - try to assign reserved VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Try to assign a reserved VMID.
+ */
+static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
+				     struct amdgpu_ring *ring,
+				     struct amdgpu_job *job,
+				     struct amdgpu_vmid **id,
+				     struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->vm_hub;
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	bool needs_flush = vm->use_cpu_for_update;
+	uint64_t updates = amdgpu_vm_tlb_seq(vm);
+	int r;
+
+	*id = vm->reserved_vmid[vmhub];
+	if ((*id)->owner != vm->immediate.fence_context ||
+	    !amdgpu_vmid_compatible(*id, job) ||
+	    (*id)->flushed_updates < updates ||
+	    !(*id)->last_flush ||
+	    ((*id)->last_flush->context != fence_context &&
+	     !dma_fence_is_signaled((*id)->last_flush)))
+		needs_flush = true;
+
+	if ((*id)->owner != vm->immediate.fence_context ||
+	    (!adev->vm_manager.concurrent_flush && needs_flush)) {
+		struct dma_fence *tmp;
+
+		/* Don't use per engine and per process VMID at the
+		 * same time
+		 */
+		if (adev->vm_manager.concurrent_flush)
+			ring = NULL;
+
+		/* to prevent one context starved by another context */
+		(*id)->pd_gpu_addr = 0;
+		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+		if (tmp) {
+			*id = NULL;
+			*fence = dma_fence_get(tmp);
+			return 0;
+		}
+	}
+
+	/* Good we can use this VMID. Remember this submission as
+	* user of the VMID.
+	*/
+	r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
+			      GFP_ATOMIC);
+	if (r)
+		return r;
+
+	job->vm_needs_flush = needs_flush;
+	job->spm_update_needed = true;
+	return 0;
+}
+
+/**
+ * amdgpu_vmid_grab_used - try to reuse a VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ *
+ * Try to reuse a VMID for this submission.
+ */
+static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
+				 struct amdgpu_ring *ring,
+				 struct amdgpu_job *job,
+				 struct amdgpu_vmid **id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->vm_hub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	uint64_t fence_context = adev->fence_context + ring->idx;
+	uint64_t updates = amdgpu_vm_tlb_seq(vm);
+	int r;
+
+	job->vm_needs_flush = vm->use_cpu_for_update;
+
+	/* Check if we can use a VMID already assigned to this VM */
+	list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
+		bool needs_flush = vm->use_cpu_for_update;
+
+		/* Check all the prerequisites to using this VMID */
+		if ((*id)->owner != vm->immediate.fence_context)
+			continue;
+
+		if (!amdgpu_vmid_compatible(*id, job))
+			continue;
+
+		if (!(*id)->last_flush ||
+		    ((*id)->last_flush->context != fence_context &&
+		     !dma_fence_is_signaled((*id)->last_flush)))
+			needs_flush = true;
+
+		if ((*id)->flushed_updates < updates)
+			needs_flush = true;
+
+		if (needs_flush && !adev->vm_manager.concurrent_flush)
+			continue;
+
+		/* Good, we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
+		r = amdgpu_sync_fence(&(*id)->active,
+				      &job->base.s_fence->finished,
+				      GFP_ATOMIC);
+		if (r)
+			return r;
+
+		job->vm_needs_flush |= needs_flush;
+		return 0;
+	}
+
+	*id = NULL;
+	return 0;
+}
+
+/**
+ * amdgpu_vmid_grab - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @job: job who wants to use the VMID
+ * @fence: fence to wait for if no id could be grabbed
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_job *job, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->vm_hub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *idle = NULL;
+	struct amdgpu_vmid *id = NULL;
+	int r = 0;
+
+	mutex_lock(&id_mgr->lock);
+	r = amdgpu_vmid_grab_idle(ring, &idle, fence);
+	if (r || !idle)
+		goto error;
+
+	if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
+		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
+		if (r || !id)
+			goto error;
+	} else {
+		r = amdgpu_vmid_grab_used(vm, ring, job, &id);
+		if (r)
+			goto error;
+
+		if (!id) {
+			/* Still no ID to use? Then use the idle one found earlier */
+			id = idle;
+
+			/* Remember this submission as user of the VMID */
+			r = amdgpu_sync_fence(&id->active,
+					      &job->base.s_fence->finished,
+					      GFP_ATOMIC);
+			if (r)
+				goto error;
+
+			job->vm_needs_flush = true;
+		}
+
+		list_move_tail(&id->list, &id_mgr->ids_lru);
+	}
+
+	job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
+	if (job->vm_needs_flush) {
+		id->flushed_updates = amdgpu_vm_tlb_seq(vm);
+		dma_fence_put(id->last_flush);
+		id->last_flush = NULL;
+	}
+	job->vmid = id - id_mgr->ids;
+	job->pasid = vm->pasid;
+
+	id->gds_base = job->gds_base;
+	id->gds_size = job->gds_size;
+	id->gws_base = job->gws_base;
+	id->gws_size = job->gws_size;
+	id->oa_base = job->oa_base;
+	id->oa_size = job->oa_size;
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->owner = vm->immediate.fence_context;
+
+	trace_amdgpu_vm_grab_id(vm, ring, job);
+
+error:
+	mutex_unlock(&id_mgr->lock);
+	return r;
+}
+
+/*
+ * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
+ * @vm: the VM to check
+ * @vmhub: the VMHUB which will be used
+ *
+ * Returns: True if the VM will use a reserved VMID.
+ */
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
+{
+	return vm->reserved_vmid[vmhub];
+}
+
+/*
+ * amdgpu_vmid_alloc_reserved - reserve a specific VMID for this vm
+ * @adev: amdgpu device structure
+ * @vm: the VM to reserve an ID for
+ * @vmhub: the VMHUB which should be used
+ *
+ * Mostly used to have a reserved VMID for debugging and SPM.
+ *
+ * Returns: 0 for success, -ENOENT if an ID is already reserved.
+ */
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			       unsigned vmhub)
+{
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id;
+	int r = 0;
+
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub])
+		goto unlock;
+	if (id_mgr->reserved_vmid) {
+		r = -ENOENT;
+		goto unlock;
+	}
+	/* Remove from normal round robin handling */
+	id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
+	list_del_init(&id->list);
+	vm->reserved_vmid[vmhub] = id;
+	id_mgr->reserved_vmid = true;
+	mutex_unlock(&id_mgr->lock);
+
+	return 0;
+unlock:
+	mutex_unlock(&id_mgr->lock);
+	return r;
+}
+
+/*
+ * amdgpu_vmid_free_reserved - free up a reserved VMID again
+ * @adev: amdgpu device structure
+ * @vm: the VM with the reserved ID
+ * @vmhub: the VMHUB which should be used
+ */
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			       unsigned vmhub)
+{
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+	mutex_lock(&id_mgr->lock);
+	if (vm->reserved_vmid[vmhub]) {
+		list_add(&vm->reserved_vmid[vmhub]->list,
+			&id_mgr->ids_lru);
+		vm->reserved_vmid[vmhub] = NULL;
+		id_mgr->reserved_vmid = false;
+	}
+	mutex_unlock(&id_mgr->lock);
+}
+
+/**
+ * amdgpu_vmid_reset - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ * @vmhub: vmhub type
+ * @vmid: vmid number to use
+ *
+ * Reset saved GDW, GWS and OA to force switch on next flush.
+ */
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+		       unsigned vmid)
+{
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id = &id_mgr->ids[vmid];
+
+	mutex_lock(&id_mgr->lock);
+	id->owner = 0;
+	id->gds_base = 0;
+	id->gds_size = 0;
+	id->gws_base = 0;
+	id->gws_size = 0;
+	id->oa_base = 0;
+	id->oa_size = 0;
+	mutex_unlock(&id_mgr->lock);
+}
+
+/**
+ * amdgpu_vmid_reset_all - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ *
+ * Reset VMID to force flush on next use
+ */
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		for (j = 1; j < id_mgr->num_ids; ++j)
+			amdgpu_vmid_reset(adev, i, j);
+	}
+}
+
+/**
+ * amdgpu_vmid_mgr_init - init the VMID manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		mutex_init(&id_mgr->lock);
+		INIT_LIST_HEAD(&id_mgr->ids_lru);
+
+		/* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+			/* manage only VMIDs not used by KFD */
+			id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+		else if (AMDGPU_IS_MMHUB0(i) ||
+			 AMDGPU_IS_MMHUB1(i))
+			id_mgr->num_ids = 16;
+		else
+			/* manage only VMIDs not used by KFD */
+			id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+
+		/* skip over VMID 0, since it is the system VM */
+		for (j = 1; j < id_mgr->num_ids; ++j) {
+			amdgpu_vmid_reset(adev, i, j);
+			amdgpu_sync_create(&id_mgr->ids[j].active);
+			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
+		}
+	}
+}
+
+/**
+ * amdgpu_vmid_mgr_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
+{
+	unsigned i, j;
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
+		struct amdgpu_vmid_mgr *id_mgr =
+			&adev->vm_manager.id_mgr[i];
+
+		mutex_destroy(&id_mgr->lock);
+		for (j = 0; j < AMDGPU_NUM_VMID; ++j) {
+			struct amdgpu_vmid *id = &id_mgr->ids[j];
+
+			amdgpu_sync_free(&id->active);
+			dma_fence_put(id->last_flush);
+			dma_fence_put(id->pasid_mapping);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
new file mode 100644
index 000000000000..b3649cd3af56
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_IDS_H__
+#define __AMDGPU_IDS_H__
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/dma-fence.h>
+
+#include "amdgpu_sync.h"
+
+/* maximum number of VMIDs */
+#define AMDGPU_NUM_VMID	16
+
+struct amdgpu_device;
+struct amdgpu_vm;
+struct amdgpu_ring;
+struct amdgpu_sync;
+struct amdgpu_job;
+
+struct amdgpu_vmid {
+	struct list_head	list;
+	struct amdgpu_sync	active;
+	struct dma_fence	*last_flush;
+	uint64_t		owner;
+
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	uint64_t		flushed_updates;
+
+	uint32_t                current_gpu_reset_count;
+
+	uint32_t		gds_base;
+	uint32_t		gds_size;
+	uint32_t		gws_base;
+	uint32_t		gws_size;
+	uint32_t		oa_base;
+	uint32_t		oa_size;
+
+	unsigned		pasid;
+	struct dma_fence	*pasid_mapping;
+};
+
+struct amdgpu_vmid_mgr {
+	struct mutex		lock;
+	unsigned		num_ids;
+	struct list_head	ids_lru;
+	struct amdgpu_vmid	ids[AMDGPU_NUM_VMID];
+	bool			reserved_vmid;
+};
+
+int amdgpu_pasid_alloc(unsigned int bits);
+void amdgpu_pasid_free(u32 pasid);
+void amdgpu_pasid_free_delayed(struct dma_resv *resv,
+			       u32 pasid);
+
+bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
+			       struct amdgpu_vmid *id);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
+int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			       unsigned vmhub);
+void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			       unsigned vmhub);
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_job *job, struct dma_fence **fence);
+void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
+		       unsigned vmid);
+void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
+
+void amdgpu_vmid_mgr_init(struct amdgpu_device *adev);
+void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
new file mode 100644
index 000000000000..a6419246e9c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/dma-mapping.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
+
+/**
+ * amdgpu_ih_ring_init - initialize the IH state
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to initialize
+ * @ring_size: ring size to allocate
+ * @use_bus_addr: true when we can use dma_alloc_coherent
+ *
+ * Initializes the IH state and allocates a buffer
+ * for the IH ring buffer.
+ * Returns 0 for success, errors for failure.
+ */
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			unsigned ring_size, bool use_bus_addr)
+{
+	u32 rb_bufsz;
+	int r;
+
+	/* Align ring size */
+	rb_bufsz = order_base_2(ring_size / 4);
+	ring_size = (1 << rb_bufsz) * 4;
+	ih->ring_size = ring_size;
+	ih->ptr_mask = ih->ring_size - 1;
+	ih->rptr = 0;
+	ih->use_bus_addr = use_bus_addr;
+
+	if (use_bus_addr) {
+		dma_addr_t dma_addr;
+
+		if (ih->ring)
+			return 0;
+
+		/* add 8 bytes for the rptr/wptr shadows and
+		 * add them to the end of the ring allocation.
+		 */
+		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
+					      &dma_addr, GFP_KERNEL);
+		if (ih->ring == NULL)
+			return -ENOMEM;
+
+		ih->gpu_addr = dma_addr;
+		ih->wptr_addr = dma_addr + ih->ring_size;
+		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
+		ih->rptr_addr = dma_addr + ih->ring_size + 4;
+		ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
+	} else {
+		unsigned wptr_offs, rptr_offs;
+
+		r = amdgpu_device_wb_get(adev, &wptr_offs);
+		if (r)
+			return r;
+
+		r = amdgpu_device_wb_get(adev, &rptr_offs);
+		if (r) {
+			amdgpu_device_wb_free(adev, wptr_offs);
+			return r;
+		}
+
+		r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &ih->ring_obj, &ih->gpu_addr,
+					    (void **)&ih->ring);
+		if (r) {
+			amdgpu_device_wb_free(adev, rptr_offs);
+			amdgpu_device_wb_free(adev, wptr_offs);
+			return r;
+		}
+
+		ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
+		ih->wptr_cpu = &adev->wb.wb[wptr_offs];
+		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
+		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
+	}
+
+	init_waitqueue_head(&ih->wait_process);
+	return 0;
+}
+
+/**
+ * amdgpu_ih_ring_fini - tear down the IH state
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to tear down
+ *
+ * Tears down the IH state and frees buffer
+ * used for the IH ring buffer.
+ */
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+
+	if (!ih->ring)
+		return;
+
+	if (ih->use_bus_addr) {
+
+		/* add 8 bytes for the rptr/wptr shadows and
+		 * add them to the end of the ring allocation.
+		 */
+		dma_free_coherent(adev->dev, ih->ring_size + 8,
+				  (void *)ih->ring, ih->gpu_addr);
+		ih->ring = NULL;
+	} else {
+		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
+				      (void **)&ih->ring);
+		amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
+		amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
+	}
+}
+
+/**
+ * amdgpu_ih_ring_write - write IV to the ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to write to
+ * @iv: the iv to write
+ * @num_dw: size of the iv in dw
+ *
+ * Writes an IV to the ring buffer using the CPU and increment the wptr.
+ * Used for testing and delegating IVs to a software ring.
+ */
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			  const uint32_t *iv, unsigned int num_dw)
+{
+	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
+	unsigned int i;
+
+	for (i = 0; i < num_dw; ++i)
+	        ih->ring[wptr++] = cpu_to_le32(iv[i]);
+
+	wptr <<= 2;
+	wptr &= ih->ptr_mask;
+
+	/* Only commit the new wptr if we don't overflow */
+	if (wptr != READ_ONCE(ih->rptr)) {
+		wmb();
+		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
+	} else if (adev->irq.retry_cam_enabled) {
+		dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
+			      wptr, ih->rptr);
+	}
+}
+
+/**
+ * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Used to ensure ring has processed IVs up to the checkpoint write pointer.
+ */
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih)
+{
+	uint32_t checkpoint_wptr;
+	uint64_t checkpoint_ts;
+	long timeout = HZ;
+
+	if (!ih->enabled || adev->shutdown)
+		return -ENODEV;
+
+	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+	/* Order wptr with ring data. */
+	rmb();
+	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+
+	return wait_event_interruptible_timeout(ih->wait_process,
+		    amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
+		    ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
+}
+
+/**
+ * amdgpu_ih_process - interrupt handler
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Interrupt hander (VI), walk the IH ring.
+ * Returns irq process return code.
+ */
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+	unsigned int count;
+	u32 wptr;
+
+	if (!ih->enabled || adev->shutdown)
+		return IRQ_NONE;
+
+	wptr = amdgpu_ih_get_wptr(adev, ih);
+
+restart_ih:
+	count  = AMDGPU_IH_MAX_NUM_IVS;
+	dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
+
+	/* Order reading of wptr vs. reading of IH ring data */
+	rmb();
+
+	while (ih->rptr != wptr && --count) {
+		amdgpu_irq_dispatch(adev, ih);
+		ih->rptr &= ih->ptr_mask;
+	}
+
+	if (!ih->overflow)
+		amdgpu_ih_set_rptr(adev, ih);
+
+	wake_up_all(&ih->wait_process);
+
+	/* make sure wptr hasn't changed while processing */
+	wptr = amdgpu_ih_get_wptr(adev, ih);
+	if (wptr != ih->rptr)
+		if (!ih->overflow)
+			goto restart_ih;
+
+	if (ih->overflow)
+		if (amdgpu_sriov_runtime(adev))
+			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+				   &adev->virt.flr_work),
+				  "Failed to queue work! at %s",
+				  __func__);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * amdgpu_ih_decode_iv_helper - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ * @entry: IV entry
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position for Vega10
+ * and later GPUs.
+ */
+void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
+				struct amdgpu_ih_ring *ih,
+				struct amdgpu_iv_entry *entry)
+{
+	/* wptr/rptr are in bytes! */
+	u32 ring_index = ih->rptr >> 2;
+	uint32_t dw[8];
+
+	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+	dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
+	dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
+	dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
+	dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
+
+	entry->client_id = dw[0] & 0xff;
+	entry->src_id = (dw[0] >> 8) & 0xff;
+	entry->ring_id = (dw[0] >> 16) & 0xff;
+	entry->vmid = (dw[0] >> 24) & 0xf;
+	entry->vmid_src = (dw[0] >> 31);
+	entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
+	entry->timestamp_src = dw[2] >> 31;
+	entry->pasid = dw[3] & 0xffff;
+	entry->node_id = (dw[3] >> 16) & 0xff;
+	entry->src_data[0] = dw[4];
+	entry->src_data[1] = dw[5];
+	entry->src_data[2] = dw[6];
+	entry->src_data[3] = dw[7];
+
+	/* wptr/rptr are in bytes! */
+	ih->rptr += 32;
+}
+
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+				       signed int offset)
+{
+	uint32_t iv_size = 32;
+	uint32_t ring_index;
+	uint32_t dw1, dw2;
+
+	rptr += iv_size * offset;
+	ring_index = (rptr & ih->ptr_mask) >> 2;
+
+	dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
+	return dw1 | ((u64)(dw2 & 0xffff) << 32);
+}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+	return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+	       ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
new file mode 100644
index 000000000000..f58b6be7fccc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IH_H__
+#define __AMDGPU_IH_H__
+
+/* Maximum number of IVs processed at once */
+#define AMDGPU_IH_MAX_NUM_IVS	32
+
+#define IH_RING_SIZE	(256 * 1024)
+#define IH_SW_RING_SIZE	(16 * 1024)	/* enough for 512 CAM entries */
+
+struct amdgpu_device;
+struct amdgpu_iv_entry;
+
+struct amdgpu_ih_regs {
+	uint32_t ih_rb_base;
+	uint32_t ih_rb_base_hi;
+	uint32_t ih_rb_cntl;
+	uint32_t ih_rb_wptr;
+	uint32_t ih_rb_rptr;
+	uint32_t ih_doorbell_rptr;
+	uint32_t ih_rb_wptr_addr_lo;
+	uint32_t ih_rb_wptr_addr_hi;
+	uint32_t psp_reg_id;
+};
+
+/*
+ * R6xx+ IH ring
+ */
+struct amdgpu_ih_ring {
+	unsigned		ring_size;
+	uint32_t		ptr_mask;
+	u32			doorbell_index;
+	bool			use_doorbell;
+	bool			use_bus_addr;
+
+	struct amdgpu_bo	*ring_obj;
+	uint32_t		*ring;
+	uint64_t		gpu_addr;
+
+	uint64_t		wptr_addr;
+	uint32_t		*wptr_cpu;
+
+	uint64_t		rptr_addr;
+	uint32_t		*rptr_cpu;
+
+	bool                    enabled;
+	unsigned		rptr;
+	struct amdgpu_ih_regs	ih_regs;
+
+	/* For waiting on IH processing at checkpoint. */
+	wait_queue_head_t wait_process;
+	uint64_t		processed_timestamp;
+	bool overflow;
+};
+
+/* return true if time stamp t2 is after t1 with 48bit wrap around */
+#define amdgpu_ih_ts_after(t1, t2) \
+		(((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+
+#define amdgpu_ih_ts_after_or_equal(t1, t2) \
+		(((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL)
+
+/* provided by the ih block */
+struct amdgpu_ih_funcs {
+	/* ring read/write ptr handling, called from interrupt context */
+	u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+	void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			  struct amdgpu_iv_entry *entry);
+	uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr,
+				 signed int offset);
+	void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+};
+
+#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
+#define amdgpu_ih_decode_iv(adev, iv) \
+	(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
+	(WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \
+	(adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr), (offset)))
+#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))
+
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			unsigned ring_size, bool use_bus_addr);
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+			  const uint32_t *iv, unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
+					    struct amdgpu_ih_ring *ih);
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
+				struct amdgpu_ih_ring *ih,
+				struct amdgpu_iv_entry *entry);
+uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
+				       signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
new file mode 100644
index 000000000000..484e936812e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IMU_H__
+#define __AMDGPU_IMU_H__
+
+enum imu_work_mode {
+	DEBUG_MODE,
+	MISSION_MODE
+};
+
+struct amdgpu_imu_funcs {
+    int (*init_microcode)(struct amdgpu_device *adev);
+    int (*load_microcode)(struct amdgpu_device *adev);
+    void (*setup_imu)(struct amdgpu_device *adev);
+    int (*start_imu)(struct amdgpu_device *adev);
+    void (*program_rlc_ram)(struct amdgpu_device *adev);
+    int (*wait_for_reset_status)(struct amdgpu_device *adev);
+};
+
+struct imu_rlc_ram_golden {
+    u32 hwip;
+    u32 instance;
+    u32 segment;
+    u32 reg;
+    u32 data;
+    u32 addr_mask;
+};
+
+#define IMU_RLC_RAM_GOLDEN_VALUE(ip, inst, reg, data, addr_mask) \
+    { ip##_HWIP, inst, reg##_BASE_IDX, reg, data, addr_mask }
+
+struct amdgpu_imu {
+    const struct amdgpu_imu_funcs *funcs;
+    enum imu_work_mode mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
new file mode 100644
index 000000000000..a1cbd7c3deb2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -0,0 +1,45 @@
+/*
+ * \file amdgpu_ioc32.c
+ *
+ * 32-bit ioctl compatibility routines for the AMDGPU DRM.
+ *
+ * \author Paul Mackerras <paulus@samba.org>
+ *
+ * Copyright (C) Paul Mackerras 2005
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <linux/compat.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_ioctl.h>
+
+#include "amdgpu_drv.h"
+
+long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	unsigned int nr = DRM_IOCTL_NR(cmd);
+
+	if (nr < DRM_COMMAND_BASE)
+		return drm_compat_ioctl(filp, cmd, arg);
+
+	return amdgpu_drm_ioctl(filp, cmd, arg);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
new file mode 100644
index 000000000000..99e1cf4fc955
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ip.h"
+
+static int8_t amdgpu_logical_to_dev_inst(struct amdgpu_device *adev,
+					 enum amd_hw_ip_block_type block,
+					 int8_t inst)
+{
+	int8_t dev_inst;
+
+	switch (block) {
+	case GC_HWIP:
+	case SDMA0_HWIP:
+	/* Both JPEG and VCN as JPEG is only alias of VCN */
+	case VCN_HWIP:
+		dev_inst = adev->ip_map.dev_inst[block][inst];
+		break;
+	default:
+		/* For rest of the IPs, no look up required.
+		 * Assume 'logical instance == physical instance' for all configs. */
+		dev_inst = inst;
+		break;
+	}
+
+	return dev_inst;
+}
+
+static uint32_t amdgpu_logical_to_dev_mask(struct amdgpu_device *adev,
+					   enum amd_hw_ip_block_type block,
+					   uint32_t mask)
+{
+	uint32_t dev_mask = 0;
+	int8_t log_inst, dev_inst;
+
+	while (mask) {
+		log_inst = ffs(mask) - 1;
+		dev_inst = amdgpu_logical_to_dev_inst(adev, block, log_inst);
+		dev_mask |= (1 << dev_inst);
+		mask &= ~(1 << log_inst);
+	}
+
+	return dev_mask;
+}
+
+static void amdgpu_populate_ip_map(struct amdgpu_device *adev,
+				   enum amd_hw_ip_block_type ip_block,
+				   uint32_t inst_mask)
+{
+	int l = 0, i;
+
+	while (inst_mask) {
+		i = ffs(inst_mask) - 1;
+		adev->ip_map.dev_inst[ip_block][l++] = i;
+		inst_mask &= ~(1 << i);
+	}
+	for (; l < HWIP_MAX_INSTANCE; l++)
+		adev->ip_map.dev_inst[ip_block][l] = -1;
+}
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev)
+{
+	u32 ip_map[][2] = {
+		{ GC_HWIP, adev->gfx.xcc_mask },
+		{ SDMA0_HWIP, adev->sdma.sdma_mask },
+		{ VCN_HWIP, adev->vcn.inst_mask },
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ip_map); ++i)
+		amdgpu_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]);
+
+	adev->ip_map.logical_to_dev_inst = amdgpu_logical_to_dev_inst;
+	adev->ip_map.logical_to_dev_mask = amdgpu_logical_to_dev_mask;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
new file mode 100644
index 000000000000..2490fd322aec
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ip.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IP_H__
+#define __AMDGPU_IP_H__
+
+void amdgpu_ip_map_init(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_IP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
new file mode 100644
index 000000000000..8112ffc85995
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -0,0 +1,780 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+/**
+ * DOC: Interrupt Handling
+ *
+ * Interrupts generated within GPU hardware raise interrupt requests that are
+ * passed to amdgpu IRQ handler which is responsible for detecting source and
+ * type of the interrupt and dispatching matching handlers. If handling an
+ * interrupt requires calling kernel functions that may sleep processing is
+ * dispatched to work handlers.
+ *
+ * If MSI functionality is not disabled by module parameter then MSI
+ * support will be enabled.
+ *
+ * For GPU interrupt sources that may be driven by another driver, IRQ domain
+ * support is used (with mapping between virtual and hardware IRQs).
+ */
+
+#include <linux/irq.h>
+#include <linux/pci.h>
+
+#include <drm/drm_vblank.h>
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "atom.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
+
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_DRM_AMD_DC
+#include "amdgpu_dm_irq.h"
+#endif
+
+#define AMDGPU_WAIT_IDLE_TIMEOUT 200
+
+const char *soc15_ih_clientid_name[] = {
+	"IH",
+	"SDMA2 or ACP",
+	"ATHUB",
+	"BIF",
+	"SDMA3 or DCE",
+	"SDMA4 or ISP",
+	"VMC1 or PCIE0",
+	"RLC",
+	"SDMA0",
+	"SDMA1",
+	"SE0SH",
+	"SE1SH",
+	"SE2SH",
+	"SE3SH",
+	"VCN1 or UVD1",
+	"THM",
+	"VCN or UVD",
+	"SDMA5 or VCE0",
+	"VMC",
+	"SDMA6 or XDMA",
+	"GRBM_CP",
+	"ATS",
+	"ROM_SMUIO",
+	"DF",
+	"SDMA7 or VCE1",
+	"PWR",
+	"reserved",
+	"UTCL2",
+	"EA",
+	"UTCL2LOG",
+	"MP0",
+	"MP1"
+};
+
+const int node_id_to_phys_map[NODEID_MAX] = {
+	[AID0_NODEID] = 0,
+	[XCD0_NODEID] = 0,
+	[XCD1_NODEID] = 1,
+	[AID1_NODEID] = 1,
+	[XCD2_NODEID] = 2,
+	[XCD3_NODEID] = 3,
+	[AID2_NODEID] = 2,
+	[XCD4_NODEID] = 4,
+	[XCD5_NODEID] = 5,
+	[AID3_NODEID] = 3,
+	[XCD6_NODEID] = 6,
+	[XCD7_NODEID] = 7,
+};
+
+/**
+ * amdgpu_irq_disable_all - disable *all* interrupts
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Disable all types of interrupts from all sources.
+ */
+void amdgpu_irq_disable_all(struct amdgpu_device *adev)
+{
+	unsigned long irqflags;
+	unsigned int i, j, k;
+	int r;
+
+	spin_lock_irqsave(&adev->irq.lock, irqflags);
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+		if (!adev->irq.client[i].sources)
+			continue;
+
+		for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+			struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+			if (!src || !src->funcs->set || !src->num_types)
+				continue;
+
+			for (k = 0; k < src->num_types; ++k) {
+				r = src->funcs->set(adev, src, k,
+						    AMDGPU_IRQ_STATE_DISABLE);
+				if (r)
+					dev_err(adev->dev,
+						"error disabling interrupt (%d)\n",
+						r);
+			}
+		}
+	}
+	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
+}
+
+/**
+ * amdgpu_irq_handler - IRQ handler
+ *
+ * @irq: IRQ number (unused)
+ * @arg: pointer to DRM device
+ *
+ * IRQ handler for amdgpu driver (all ASICs).
+ *
+ * Returns:
+ * result of handling the IRQ, as defined by &irqreturn_t
+ */
+static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
+{
+	struct drm_device *dev = (struct drm_device *) arg;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	irqreturn_t ret;
+
+	ret = amdgpu_ih_process(adev, &adev->irq.ih);
+	if (ret == IRQ_HANDLED)
+		pm_runtime_mark_last_busy(dev->dev);
+
+	amdgpu_ras_interrupt_fatal_error_handler(adev);
+
+	return ret;
+}
+
+/**
+ * amdgpu_irq_handle_ih1 - kick of processing for IH1
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 1.
+ */
+static void amdgpu_irq_handle_ih1(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  irq.ih1_work);
+
+	amdgpu_ih_process(adev, &adev->irq.ih1);
+}
+
+/**
+ * amdgpu_irq_handle_ih2 - kick of processing for IH2
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 2.
+ */
+static void amdgpu_irq_handle_ih2(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  irq.ih2_work);
+
+	amdgpu_ih_process(adev, &adev->irq.ih2);
+}
+
+/**
+ * amdgpu_irq_handle_ih_soft - kick of processing for ih_soft
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH soft ring.
+ */
+static void amdgpu_irq_handle_ih_soft(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  irq.ih_soft_work);
+
+	amdgpu_ih_process(adev, &adev->irq.ih_soft);
+}
+
+/**
+ * amdgpu_msi_ok - check whether MSI functionality is enabled
+ *
+ * @adev: amdgpu device pointer (unused)
+ *
+ * Checks whether MSI functionality has been disabled via module parameter
+ * (all ASICs).
+ *
+ * Returns:
+ * *true* if MSIs are allowed to be enabled or *false* otherwise
+ */
+static bool amdgpu_msi_ok(struct amdgpu_device *adev)
+{
+	if (amdgpu_msi == 1)
+		return true;
+	else if (amdgpu_msi == 0)
+		return false;
+
+	return true;
+}
+
+void amdgpu_restore_msix(struct amdgpu_device *adev)
+{
+	u16 ctrl;
+
+	pci_read_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
+	if (!(ctrl & PCI_MSIX_FLAGS_ENABLE))
+		return;
+
+	/* VF FLR */
+	ctrl &= ~PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+	ctrl |= PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(adev->pdev, adev->pdev->msix_cap + PCI_MSIX_FLAGS, ctrl);
+}
+
+/**
+ * amdgpu_irq_init - initialize interrupt handling
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Sets up work functions for hotplug and reset interrupts, enables MSI
+ * functionality, initializes vblank, hotplug and reset interrupt handling.
+ *
+ * Returns:
+ * 0 on success or error code on failure
+ */
+int amdgpu_irq_init(struct amdgpu_device *adev)
+{
+	unsigned int irq, flags;
+	int r;
+
+	spin_lock_init(&adev->irq.lock);
+
+	/* Enable MSI if not disabled by module parameter */
+	adev->irq.msi_enabled = false;
+
+	if (!amdgpu_msi_ok(adev))
+		flags = PCI_IRQ_INTX;
+	else
+		flags = PCI_IRQ_ALL_TYPES;
+
+	/* we only need one vector */
+	r = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags);
+	if (r < 0) {
+		dev_err(adev->dev, "Failed to alloc msi vectors\n");
+		return r;
+	}
+
+	if (amdgpu_msi_ok(adev)) {
+		adev->irq.msi_enabled = true;
+		dev_dbg(adev->dev, "using MSI/MSI-X.\n");
+	}
+
+	INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
+	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
+	INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft);
+
+	/* Use vector 0 for MSI-X. */
+	r = pci_irq_vector(adev->pdev, 0);
+	if (r < 0)
+		goto free_vectors;
+	irq = r;
+
+	/* PCI devices require shared interrupts. */
+	r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
+			adev_to_drm(adev));
+	if (r)
+		goto free_vectors;
+
+	adev->irq.installed = true;
+	adev->irq.irq = irq;
+	adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
+
+	dev_dbg(adev->dev, "amdgpu: irq initialized.\n");
+	return 0;
+
+free_vectors:
+	if (adev->irq.msi_enabled)
+		pci_free_irq_vectors(adev->pdev);
+
+	adev->irq.msi_enabled = false;
+	return r;
+}
+
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
+{
+	if (adev->irq.installed) {
+		free_irq(adev->irq.irq, adev_to_drm(adev));
+		adev->irq.installed = false;
+		if (adev->irq.msi_enabled)
+			pci_free_irq_vectors(adev->pdev);
+	}
+
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
+	amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
+}
+
+/**
+ * amdgpu_irq_fini_sw - shut down interrupt handling
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Tears down work functions for hotplug and reset interrupts, disables MSI
+ * functionality, shuts down vblank, hotplug and reset interrupt handling,
+ * turns off interrupts from all sources (all ASICs).
+ */
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+		if (!adev->irq.client[i].sources)
+			continue;
+
+		for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+			struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+			if (!src)
+				continue;
+
+			kfree(src->enabled_types);
+			src->enabled_types = NULL;
+		}
+		kfree(adev->irq.client[i].sources);
+		adev->irq.client[i].sources = NULL;
+	}
+}
+
+/**
+ * amdgpu_irq_add_id - register IRQ source
+ *
+ * @adev: amdgpu device pointer
+ * @client_id: client id
+ * @src_id: source id
+ * @source: IRQ source pointer
+ *
+ * Registers IRQ source on a client.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+		      unsigned int client_id, unsigned int src_id,
+		      struct amdgpu_irq_src *source)
+{
+	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
+		return -EINVAL;
+
+	if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
+		return -EINVAL;
+
+	if (!source->funcs)
+		return -EINVAL;
+
+	if (!adev->irq.client[client_id].sources) {
+		adev->irq.client[client_id].sources =
+			kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
+				sizeof(struct amdgpu_irq_src *),
+				GFP_KERNEL);
+		if (!adev->irq.client[client_id].sources)
+			return -ENOMEM;
+	}
+
+	if (adev->irq.client[client_id].sources[src_id] != NULL)
+		return -EINVAL;
+
+	if (source->num_types && !source->enabled_types) {
+		atomic_t *types;
+
+		types = kcalloc(source->num_types, sizeof(atomic_t),
+				GFP_KERNEL);
+		if (!types)
+			return -ENOMEM;
+
+		source->enabled_types = types;
+	}
+
+	adev->irq.client[client_id].sources[src_id] = source;
+	return 0;
+}
+
+/**
+ * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
+ *
+ * @adev: amdgpu device pointer
+ * @ih: interrupt ring instance
+ *
+ * Dispatches IRQ to IP blocks.
+ */
+void amdgpu_irq_dispatch(struct amdgpu_device *adev,
+			 struct amdgpu_ih_ring *ih)
+{
+	u32 ring_index = ih->rptr >> 2;
+	struct amdgpu_iv_entry entry;
+	unsigned int client_id, src_id;
+	struct amdgpu_irq_src *src;
+	bool handled = false;
+	int r;
+
+	entry.ih = ih;
+	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+
+	/*
+	 * timestamp is not supported on some legacy SOCs (cik, cz, iceland,
+	 * si and tonga), so initialize timestamp and timestamp_src to 0
+	 */
+	entry.timestamp = 0;
+	entry.timestamp_src = 0;
+
+	amdgpu_ih_decode_iv(adev, &entry);
+
+	trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
+
+	client_id = entry.client_id;
+	src_id = entry.src_id;
+
+	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
+		dev_dbg(adev->dev, "Invalid client_id in IV: %d\n", client_id);
+
+	} else	if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
+		dev_dbg(adev->dev, "Invalid src_id in IV: %d\n", src_id);
+
+	} else if (((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) ||
+		    (client_id == SOC15_IH_CLIENTID_ISP)) &&
+		   adev->irq.virq[src_id]) {
+		generic_handle_domain_irq(adev->irq.domain, src_id);
+
+	} else if (!adev->irq.client[client_id].sources) {
+		dev_dbg(adev->dev,
+			"Unregistered interrupt client_id: %d src_id: %d\n",
+			client_id, src_id);
+
+	} else if ((src = adev->irq.client[client_id].sources[src_id])) {
+		r = src->funcs->process(adev, src, &entry);
+		if (r < 0)
+			dev_err(adev->dev, "error processing interrupt (%d)\n",
+				r);
+		else if (r)
+			handled = true;
+
+	} else {
+		dev_dbg(adev->dev,
+			"Unregistered interrupt src_id: %d of client_id:%d\n",
+			src_id, client_id);
+	}
+
+	/* Send it to amdkfd as well if it isn't already handled */
+	if (!handled)
+		amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
+
+	if (amdgpu_ih_ts_after(ih->processed_timestamp, entry.timestamp))
+		ih->processed_timestamp = entry.timestamp;
+}
+
+/**
+ * amdgpu_irq_delegate - delegate IV to soft IH ring
+ *
+ * @adev: amdgpu device pointer
+ * @entry: IV entry
+ * @num_dw: size of IV
+ *
+ * Delegate the IV to the soft IH ring and schedule processing of it. Used
+ * if the hardware delegation to IH1 or IH2 doesn't work for some reason.
+ */
+void amdgpu_irq_delegate(struct amdgpu_device *adev,
+			 struct amdgpu_iv_entry *entry,
+			 unsigned int num_dw)
+{
+	amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
+	schedule_work(&adev->irq.ih_soft_work);
+}
+
+/**
+ * amdgpu_irq_update - update hardware interrupt state
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Updates interrupt state for the specific source (all ASICs).
+ */
+int amdgpu_irq_update(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *src, unsigned int type)
+{
+	unsigned long irqflags;
+	enum amdgpu_interrupt_state state;
+	int r;
+
+	spin_lock_irqsave(&adev->irq.lock, irqflags);
+
+	/* We need to determine after taking the lock, otherwise
+	 * we might disable just enabled interrupts again
+	 */
+	if (amdgpu_irq_enabled(adev, src, type))
+		state = AMDGPU_IRQ_STATE_ENABLE;
+	else
+		state = AMDGPU_IRQ_STATE_DISABLE;
+
+	r = src->funcs->set(adev, src, type, state);
+	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
+	return r;
+}
+
+/**
+ * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Updates state of all types of interrupts on all sources on resume after
+ * reset.
+ */
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+	int i, j, k;
+
+	if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
+		amdgpu_restore_msix(adev);
+
+	for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
+		if (!adev->irq.client[i].sources)
+			continue;
+
+		for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) {
+			struct amdgpu_irq_src *src = adev->irq.client[i].sources[j];
+
+			if (!src || !src->funcs || !src->funcs->set)
+				continue;
+			for (k = 0; k < src->num_types; k++)
+				amdgpu_irq_update(adev, src, k);
+		}
+	}
+}
+
+/**
+ * amdgpu_irq_get - enable interrupt
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+		   unsigned int type)
+{
+	if (!adev->irq.installed)
+		return -ENOENT;
+
+	if (type >= src->num_types)
+		return -EINVAL;
+
+	if (!src->enabled_types || !src->funcs->set)
+		return -EINVAL;
+
+	if (atomic_inc_return(&src->enabled_types[type]) == 1)
+		return amdgpu_irq_update(adev, src, type);
+
+	return 0;
+}
+
+/**
+ * amdgpu_irq_put - disable interrupt
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+		   unsigned int type)
+{
+	/* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */
+	if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type))
+		return -EINVAL;
+
+	if (!adev->irq.installed)
+		return -ENOENT;
+
+	if (type >= src->num_types)
+		return -EINVAL;
+
+	if (!src->enabled_types || !src->funcs->set)
+		return -EINVAL;
+
+	if (WARN_ON(!amdgpu_irq_enabled(adev, src, type)))
+		return -EINVAL;
+
+	if (atomic_dec_and_test(&src->enabled_types[type]))
+		return amdgpu_irq_update(adev, src, type);
+
+	return 0;
+}
+
+/**
+ * amdgpu_irq_enabled - check whether interrupt is enabled or not
+ *
+ * @adev: amdgpu device pointer
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Checks whether the given type of interrupt is enabled on the given source.
+ *
+ * Returns:
+ * *true* if interrupt is enabled, *false* if interrupt is disabled or on
+ * invalid parameters
+ */
+bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+			unsigned int type)
+{
+	if (!adev->irq.installed)
+		return false;
+
+	if (type >= src->num_types)
+		return false;
+
+	if (!src->enabled_types || !src->funcs->set)
+		return false;
+
+	return !!atomic_read(&src->enabled_types[type]);
+}
+
+/* XXX: Generic IRQ handling */
+static void amdgpu_irq_mask(struct irq_data *irqd)
+{
+	/* XXX */
+}
+
+static void amdgpu_irq_unmask(struct irq_data *irqd)
+{
+	/* XXX */
+}
+
+/* amdgpu hardware interrupt chip descriptor */
+static struct irq_chip amdgpu_irq_chip = {
+	.name = "amdgpu-ih",
+	.irq_mask = amdgpu_irq_mask,
+	.irq_unmask = amdgpu_irq_unmask,
+};
+
+/**
+ * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers
+ *
+ * @d: amdgpu IRQ domain pointer (unused)
+ * @irq: virtual IRQ number
+ * @hwirq: hardware irq number
+ *
+ * Current implementation assigns simple interrupt handler to the given virtual
+ * IRQ.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+static int amdgpu_irqdomain_map(struct irq_domain *d,
+				unsigned int irq, irq_hw_number_t hwirq)
+{
+	if (hwirq >= AMDGPU_MAX_IRQ_SRC_ID)
+		return -EPERM;
+
+	irq_set_chip_and_handler(irq,
+				 &amdgpu_irq_chip, handle_simple_irq);
+	return 0;
+}
+
+/* Implementation of methods for amdgpu IRQ domain */
+static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
+	.map = amdgpu_irqdomain_map,
+};
+
+/**
+ * amdgpu_irq_add_domain - create a linear IRQ domain
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Creates an IRQ domain for GPU interrupt sources
+ * that may be driven by another driver (e.g., ACP).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
+int amdgpu_irq_add_domain(struct amdgpu_device *adev)
+{
+	adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+						    &amdgpu_hw_irqdomain_ops, adev);
+	if (!adev->irq.domain) {
+		dev_err(adev->dev, "GPU irq add domain failed\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_irq_remove_domain - remove the IRQ domain
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Removes the IRQ domain for GPU interrupt sources
+ * that may be driven by another driver (e.g., ACP).
+ */
+void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
+{
+	if (adev->irq.domain) {
+		irq_domain_remove(adev->irq.domain);
+		adev->irq.domain = NULL;
+	}
+}
+
+/**
+ * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs
+ *
+ * @adev: amdgpu device pointer
+ * @src_id: IH source id
+ *
+ * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ
+ * Use this for components that generate a GPU interrupt, but are driven
+ * by a different driver (e.g., ACP).
+ *
+ * Returns:
+ * Linux IRQ
+ */
+unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id)
+{
+	adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id);
+
+	return adev->irq.virq[src_id];
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
new file mode 100644
index 000000000000..9f0417456abd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_IRQ_H__
+#define __AMDGPU_IRQ_H__
+
+#include <linux/irqdomain.h>
+#include "soc15_ih_clientid.h"
+#include "amdgpu_ih.h"
+
+#define AMDGPU_MAX_IRQ_SRC_ID		0x100
+#define AMDGPU_MAX_IRQ_CLIENT_ID	0x100
+
+#define AMDGPU_IRQ_CLIENTID_LEGACY	0
+#define AMDGPU_IRQ_CLIENTID_MAX		SOC15_IH_CLIENTID_MAX
+
+#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW	4
+
+struct amdgpu_device;
+
+enum amdgpu_interrupt_state {
+	AMDGPU_IRQ_STATE_DISABLE,
+	AMDGPU_IRQ_STATE_ENABLE,
+};
+
+struct amdgpu_iv_entry {
+	struct amdgpu_ih_ring *ih;
+	unsigned client_id;
+	unsigned src_id;
+	unsigned ring_id;
+	unsigned vmid;
+	unsigned vmid_src;
+	uint64_t timestamp;
+	unsigned timestamp_src;
+	unsigned pasid;
+	unsigned node_id;
+	unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
+	const uint32_t *iv_entry;
+};
+
+struct amdgpu_irq_src {
+	unsigned				num_types;
+	atomic_t				*enabled_types;
+	const struct amdgpu_irq_src_funcs	*funcs;
+};
+
+struct amdgpu_irq_client {
+	struct amdgpu_irq_src **sources;
+};
+
+/* provided by interrupt generating IP blocks */
+struct amdgpu_irq_src_funcs {
+	int (*set)(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+		   unsigned type, enum amdgpu_interrupt_state state);
+
+	int (*process)(struct amdgpu_device *adev,
+		       struct amdgpu_irq_src *source,
+		       struct amdgpu_iv_entry *entry);
+};
+
+struct amdgpu_irq {
+	bool				installed;
+	unsigned int			irq;
+	spinlock_t			lock;
+	/* interrupt sources */
+	struct amdgpu_irq_client	client[AMDGPU_IRQ_CLIENTID_MAX];
+
+	/* status, etc. */
+	bool				msi_enabled; /* msi enabled */
+
+	/* interrupt rings */
+	struct amdgpu_ih_ring		ih, ih1, ih2, ih_soft;
+	const struct amdgpu_ih_funcs    *ih_funcs;
+	struct work_struct		ih1_work, ih2_work, ih_soft_work;
+	struct amdgpu_irq_src		self_irq;
+
+	/* gen irq stuff */
+	struct irq_domain		*domain; /* GPU irq controller domain */
+	unsigned			virq[AMDGPU_MAX_IRQ_SRC_ID];
+	uint32_t                        srbm_soft_reset;
+	u32                             retry_cam_doorbell_index;
+	bool                            retry_cam_enabled;
+};
+
+enum interrupt_node_id_per_aid {
+	AID0_NODEID = 0,
+	XCD0_NODEID = 1,
+	XCD1_NODEID = 2,
+	AID1_NODEID = 4,
+	XCD2_NODEID = 5,
+	XCD3_NODEID = 6,
+	AID2_NODEID = 8,
+	XCD4_NODEID = 9,
+	XCD5_NODEID = 10,
+	AID3_NODEID = 12,
+	XCD6_NODEID = 13,
+	XCD7_NODEID = 14,
+	NODEID_MAX,
+};
+
+extern const int node_id_to_phys_map[NODEID_MAX];
+
+void amdgpu_irq_disable_all(struct amdgpu_device *adev);
+
+int amdgpu_irq_init(struct amdgpu_device *adev);
+void amdgpu_irq_fini_sw(struct amdgpu_device *adev);
+void amdgpu_irq_fini_hw(struct amdgpu_device *adev);
+int amdgpu_irq_add_id(struct amdgpu_device *adev,
+		      unsigned client_id, unsigned src_id,
+		      struct amdgpu_irq_src *source);
+void amdgpu_irq_dispatch(struct amdgpu_device *adev,
+			 struct amdgpu_ih_ring *ih);
+void amdgpu_irq_delegate(struct amdgpu_device *adev,
+			 struct amdgpu_iv_entry *entry,
+			 unsigned int num_dw);
+int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+		      unsigned type);
+int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+		   unsigned type);
+int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+		   unsigned type);
+bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
+			unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
+
+int amdgpu_irq_add_domain(struct amdgpu_device *adev);
+void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
+unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id);
+void amdgpu_restore_msix(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
new file mode 100644
index 000000000000..37270c4dab8d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -0,0 +1,345 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/mfd/core.h>
+
+#include "amdgpu.h"
+#include "amdgpu_isp.h"
+#include "isp_v4_1_0.h"
+#include "isp_v4_1_1.h"
+
+#define ISP_MC_ADDR_ALIGN (1024 * 32)
+
+/**
+ * isp_hw_init - start and test isp block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_isp *isp = &adev->isp;
+
+	if (isp->funcs->hw_init != NULL)
+		return isp->funcs->hw_init(isp);
+
+	return -ENODEV;
+}
+
+/**
+ * isp_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int isp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_isp *isp = &ip_block->adev->isp;
+
+	if (isp->funcs->hw_fini != NULL)
+		return isp->funcs->hw_fini(isp);
+
+	return -ENODEV;
+}
+
+static int isp_load_fw_by_psp(struct amdgpu_device *adev)
+{
+	const struct common_firmware_header *hdr;
+	char ucode_prefix[10];
+	int r = 0;
+
+	/* get isp fw binary name and path */
+	amdgpu_ucode_ip_version_decode(adev, ISP_HWIP, ucode_prefix,
+				       sizeof(ucode_prefix));
+
+	/* read isp fw */
+	r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+				"amdgpu/%s.bin", ucode_prefix);
+	if (r) {
+		amdgpu_ucode_release(&adev->isp.fw);
+		return r;
+	}
+
+	hdr = (const struct common_firmware_header *)adev->isp.fw->data;
+
+	adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].ucode_id =
+		AMDGPU_UCODE_ID_ISP;
+	adev->firmware.ucode[AMDGPU_UCODE_ID_ISP].fw = adev->isp.fw;
+
+	adev->firmware.fw_size +=
+		ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+
+	return r;
+}
+
+static int isp_early_init(struct amdgpu_ip_block *ip_block)
+{
+
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_isp *isp = &adev->isp;
+
+	switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
+	case IP_VERSION(4, 1, 0):
+		isp_v4_1_0_set_isp_funcs(isp);
+		break;
+	case IP_VERSION(4, 1, 1):
+		isp_v4_1_1_set_isp_funcs(isp);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	isp->adev = adev;
+	isp->parent = adev->dev;
+
+	if (isp_load_fw_by_psp(adev)) {
+		DRM_DEBUG_DRIVER("%s: isp fw load failed\n", __func__);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static bool isp_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev)
+{
+	if (isp_parent != amdgpu_dev)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * isp_user_buffer_alloc - create user buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @dmabuf: DMABUF handle for isp buffer allocated in system memory
+ * @buf_obj: GPU buffer object handle to initialize
+ * @buf_addr: GPU addr of the pinned BO to initialize
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate GPU addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_user_buffer_alloc(struct device *dev, void *dmabuf,
+			  void **buf_obj, u64 *buf_addr)
+{
+	struct platform_device *ispdev = to_platform_device(dev);
+	const struct isp_platform_data *isp_pdata;
+	struct amdgpu_device *adev;
+	struct mfd_cell *mfd_cell;
+	struct amdgpu_bo *bo;
+	u64 gpu_addr;
+	int ret;
+
+	if (WARN_ON(!ispdev))
+		return -ENODEV;
+
+	if (WARN_ON(!buf_obj))
+		return -EINVAL;
+
+	if (WARN_ON(!buf_addr))
+		return -EINVAL;
+
+	mfd_cell = &ispdev->mfd_cell[0];
+	if (!mfd_cell)
+		return -ENODEV;
+
+	isp_pdata = mfd_cell->platform_data;
+	adev = isp_pdata->adev;
+
+	ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+	if (ret)
+		return ret;
+
+	ret = amdgpu_bo_create_isp_user(adev, dmabuf,
+					AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr);
+	if (ret) {
+		drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret);
+		return ret;
+	}
+
+	*buf_obj = (void *)bo;
+	*buf_addr = gpu_addr;
+
+	return 0;
+}
+EXPORT_SYMBOL(isp_user_buffer_alloc);
+
+/**
+ * isp_user_buffer_free - free isp user buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void isp_user_buffer_free(void *buf_obj)
+{
+	amdgpu_bo_free_isp_user(buf_obj);
+}
+EXPORT_SYMBOL(isp_user_buffer_free);
+
+/**
+ * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp
+ *
+ * @dev: isp device handle
+ * @size: size for the new BO
+ * @buf_obj: GPU BO handle to initialize
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: CPU address mapping of BO
+ *
+ * Allocates and pins a kernel BO for internal isp firmware use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int isp_kernel_buffer_alloc(struct device *dev, u64 size,
+			    void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+	struct platform_device *ispdev = to_platform_device(dev);
+	struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+	const struct isp_platform_data *isp_pdata;
+	struct amdgpu_device *adev;
+	struct mfd_cell *mfd_cell;
+	int ret;
+
+	if (WARN_ON(!ispdev))
+		return -ENODEV;
+
+	if (WARN_ON(!buf_obj))
+		return -EINVAL;
+
+	if (WARN_ON(!gpu_addr))
+		return -EINVAL;
+
+	if (WARN_ON(!cpu_addr))
+		return -EINVAL;
+
+	mfd_cell = &ispdev->mfd_cell[0];
+	if (!mfd_cell)
+		return -ENODEV;
+
+	isp_pdata = mfd_cell->platform_data;
+	adev = isp_pdata->adev;
+
+	ret = is_valid_isp_device(ispdev->dev.parent, adev->dev);
+	if (ret)
+		return ret;
+
+	/* Ensure *bo is NULL so a new BO will be created */
+	*bo = NULL;
+	ret = amdgpu_bo_create_kernel(adev,
+				      size,
+				      ISP_MC_ADDR_ALIGN,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      bo,
+				      gpu_addr,
+				      cpu_addr);
+	if (!cpu_addr || ret) {
+		drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(isp_kernel_buffer_alloc);
+
+/**
+ * isp_kernel_buffer_free - free isp kernel buffer object (BO)
+ *
+ * @buf_obj: amdgpu isp user BO to free
+ * @gpu_addr: GPU addr of isp kernel BO
+ * @cpu_addr: CPU addr of isp kernel BO
+ *
+ * unmaps and unpin a isp kernel BO.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
+ */
+void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr)
+{
+	struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj;
+
+	amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr);
+}
+EXPORT_SYMBOL(isp_kernel_buffer_free);
+
+static const struct amd_ip_funcs isp_ip_funcs = {
+	.name = "isp_ip",
+	.early_init = isp_early_init,
+	.hw_init = isp_hw_init,
+	.hw_fini = isp_hw_fini,
+	.is_idle = isp_is_idle,
+	.set_clockgating_state = isp_set_clockgating_state,
+	.set_powergating_state = isp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_ISP,
+	.major = 4,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &isp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version isp_v4_1_1_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_ISP,
+	.major = 4,
+	.minor = 1,
+	.rev = 1,
+	.funcs = &isp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
new file mode 100644
index 000000000000..d6f4ffa4c97c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef __AMDGPU_ISP_H__
+#define __AMDGPU_ISP_H__
+
+#include <drm/amd/isp.h>
+#include <linux/pm_domain.h>
+
+#define ISP_REGS_OFFSET_END 0x629A4
+
+struct amdgpu_isp;
+
+struct isp_funcs {
+	int (*hw_init)(struct amdgpu_isp *isp);
+	int (*hw_fini)(struct amdgpu_isp *isp);
+};
+
+struct amdgpu_isp {
+	struct device *parent;
+	struct amdgpu_device	*adev;
+	const struct isp_funcs	*funcs;
+	struct mfd_cell *isp_cell;
+	struct resource *isp_res;
+	struct resource *isp_i2c_res;
+	struct resource *isp_gpio_res;
+	struct isp_platform_data *isp_pdata;
+	unsigned int harvest_config;
+	const struct firmware	*fw;
+	struct generic_pm_domain ispgpd;
+};
+
+extern const struct amdgpu_ip_block_version isp_v4_1_0_ip_block;
+extern const struct amdgpu_ip_block_version isp_v4_1_1_ip_block;
+
+#endif /* __AMDGPU_ISP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
new file mode 100644
index 000000000000..0a0dcbf0798d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dev_coredump.h"
+#include "amdgpu_xgmi.h"
+
+static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
+				    struct amdgpu_job *job)
+{
+	int i;
+
+	dev_info(adev->dev, "Dumping IP State\n");
+	for (i = 0; i < adev->num_ip_blocks; i++)
+		if (adev->ip_blocks[i].version->funcs->dump_ip_state)
+			adev->ip_blocks[i].version->funcs
+				->dump_ip_state((void *)&adev->ip_blocks[i]);
+	dev_info(adev->dev, "Dumping IP State Completed\n");
+
+	amdgpu_coredump(adev, true, false, job);
+}
+
+static void amdgpu_job_core_dump(struct amdgpu_device *adev,
+				 struct amdgpu_job *job)
+{
+	struct list_head device_list, *device_list_handle =  NULL;
+	struct amdgpu_device *tmp_adev = NULL;
+	struct amdgpu_hive_info *hive = NULL;
+
+	if (!amdgpu_sriov_vf(adev))
+		hive = amdgpu_get_xgmi_hive(adev);
+	if (hive)
+		mutex_lock(&hive->hive_lock);
+	/*
+	 * Reuse the logic in amdgpu_device_gpu_recover() to build list of
+	 * devices for code dump
+	 */
+	INIT_LIST_HEAD(&device_list);
+	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+			list_add_tail(&tmp_adev->reset_list, &device_list);
+		if (!list_is_first(&adev->reset_list, &device_list))
+			list_rotate_to_front(&adev->reset_list, &device_list);
+		device_list_handle = &device_list;
+	} else {
+		list_add_tail(&adev->reset_list, &device_list);
+		device_list_handle = &device_list;
+	}
+
+	/* Do the coredump for each device */
+	list_for_each_entry(tmp_adev, device_list_handle, reset_list)
+		amdgpu_job_do_core_dump(tmp_adev, job);
+
+	if (hive) {
+		mutex_unlock(&hive->hive_lock);
+		amdgpu_put_xgmi_hive(hive);
+	}
+}
+
+static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
+{
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+	struct amdgpu_job *job = to_amdgpu_job(s_job);
+	struct drm_wedge_task_info *info = NULL;
+	struct amdgpu_task_info *ti = NULL;
+	struct amdgpu_device *adev = ring->adev;
+	int idx, r;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx)) {
+		dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
+			 __func__, s_job->sched->name);
+
+		/* Effectively the job is aborted as the device is gone */
+		return DRM_GPU_SCHED_STAT_ENODEV;
+	}
+
+	/*
+	 * Do the coredump immediately after a job timeout to get a very
+	 * close dump/snapshot/representation of GPU's current error status
+	 * Skip it for SRIOV, since VF FLR will be triggered by host driver
+	 * before job timeout
+	 */
+	if (!amdgpu_sriov_vf(adev))
+		amdgpu_job_core_dump(adev, job);
+
+	if (amdgpu_gpu_recovery &&
+	    amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
+	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
+		dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
+			s_job->sched->name);
+		goto exit;
+	}
+
+	dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+		job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+		ring->fence_drv.sync_seq);
+
+	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
+	if (ti) {
+		amdgpu_vm_print_task_info(adev, ti);
+		info = &ti->task;
+	}
+
+	/* attempt a per ring reset */
+	if (amdgpu_gpu_recovery &&
+	    amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
+	    ring->funcs->reset) {
+		dev_err(adev->dev, "Starting %s ring reset\n",
+			s_job->sched->name);
+		r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
+		if (!r) {
+			atomic_inc(&ring->adev->gpu_reset_counter);
+			dev_err(adev->dev, "Ring %s reset succeeded\n",
+				ring->sched.name);
+			drm_dev_wedged_event(adev_to_drm(adev),
+					     DRM_WEDGE_RECOVERY_NONE, info);
+			goto exit;
+		}
+		dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
+	}
+
+	dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
+	if (amdgpu_device_should_recover_gpu(ring->adev)) {
+		struct amdgpu_reset_context reset_context;
+		memset(&reset_context, 0, sizeof(reset_context));
+
+		reset_context.method = AMD_RESET_METHOD_NONE;
+		reset_context.reset_req_dev = adev;
+		reset_context.src = AMDGPU_RESET_SRC_JOB;
+		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+		/*
+		 * To avoid an unnecessary extra coredump, as we have already
+		 * got the very close representation of GPU's error status
+		 */
+		set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+		r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
+		if (r)
+			dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
+	} else {
+		drm_sched_suspend_timeout(&ring->sched);
+		if (amdgpu_sriov_vf(adev))
+			adev->virt.tdr_debug = true;
+	}
+
+exit:
+	amdgpu_vm_put_task_info(ti);
+	drm_dev_exit(idx);
+	return DRM_GPU_SCHED_STAT_RESET;
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct drm_sched_entity *entity, void *owner,
+		     unsigned int num_ibs, struct amdgpu_job **job,
+		     u64 drm_client_id)
+{
+	struct amdgpu_fence *af;
+	int r;
+
+	if (num_ibs == 0)
+		return -EINVAL;
+
+	*job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
+	if (!*job)
+		return -ENOMEM;
+
+	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+	if (!af) {
+		r = -ENOMEM;
+		goto err_job;
+	}
+	(*job)->hw_fence = af;
+
+	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+	if (!af) {
+		r = -ENOMEM;
+		goto err_fence;
+	}
+	(*job)->hw_vm_fence = af;
+
+	(*job)->vm = vm;
+
+	amdgpu_sync_create(&(*job)->explicit_sync);
+	(*job)->generation = amdgpu_vm_generation(adev, vm);
+	(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
+
+	if (!entity)
+		return 0;
+
+	return drm_sched_job_init(&(*job)->base, entity, 1, owner,
+				  drm_client_id);
+
+err_fence:
+	kfree((*job)->hw_fence);
+err_job:
+	kfree(*job);
+	*job = NULL;
+
+	return r;
+}
+
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+			     struct drm_sched_entity *entity, void *owner,
+			     size_t size, enum amdgpu_ib_pool_type pool_type,
+			     struct amdgpu_job **job, u64 k_job_id)
+{
+	int r;
+
+	r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job,
+			     k_job_id);
+	if (r)
+		return r;
+
+	(*job)->num_ibs = 1;
+	r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
+	if (r) {
+		if (entity)
+			drm_sched_job_cleanup(&(*job)->base);
+		kfree((*job)->hw_vm_fence);
+		kfree((*job)->hw_fence);
+		kfree(*job);
+		*job = NULL;
+	}
+
+	return r;
+}
+
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+			      struct amdgpu_bo *gws, struct amdgpu_bo *oa)
+{
+	if (gds) {
+		job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+		job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
+	}
+	if (gws) {
+		job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+		job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
+	}
+	if (oa) {
+		job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+		job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
+	}
+}
+
+void amdgpu_job_free_resources(struct amdgpu_job *job)
+{
+	struct dma_fence *f;
+	unsigned i;
+
+	/* Check if any fences were initialized */
+	if (job->base.s_fence && job->base.s_fence->finished.ops)
+		f = &job->base.s_fence->finished;
+	else if (job->hw_fence && job->hw_fence->base.ops)
+		f = &job->hw_fence->base;
+	else
+		f = NULL;
+
+	for (i = 0; i < job->num_ibs; ++i)
+		amdgpu_ib_free(&job->ibs[i], f);
+}
+
+static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
+{
+	struct amdgpu_job *job = to_amdgpu_job(s_job);
+
+	drm_sched_job_cleanup(s_job);
+
+	amdgpu_sync_free(&job->explicit_sync);
+
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
+	kfree(job);
+}
+
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+				struct amdgpu_job *leader)
+{
+	struct dma_fence *fence = &leader->base.s_fence->scheduled;
+
+	WARN_ON(job->gang_submit);
+
+	/*
+	 * Don't add a reference when we are the gang leader to avoid circle
+	 * dependency.
+	 */
+	if (job != leader)
+		dma_fence_get(fence);
+	job->gang_submit = fence;
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+	if (job->base.entity)
+		drm_sched_job_cleanup(&job->base);
+
+	amdgpu_job_free_resources(job);
+	amdgpu_sync_free(&job->explicit_sync);
+	if (job->gang_submit != &job->base.s_fence->scheduled)
+		dma_fence_put(job->gang_submit);
+
+	if (job->hw_fence->base.ops)
+		dma_fence_put(&job->hw_fence->base);
+	else
+		kfree(job->hw_fence);
+	if (job->hw_vm_fence->base.ops)
+		dma_fence_put(&job->hw_vm_fence->base);
+	else
+		kfree(job->hw_vm_fence);
+
+	kfree(job);
+}
+
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
+{
+	struct dma_fence *f;
+
+	drm_sched_job_arm(&job->base);
+	f = dma_fence_get(&job->base.s_fence->finished);
+	amdgpu_job_free_resources(job);
+	drm_sched_entity_push_job(&job->base);
+
+	return f;
+}
+
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+			     struct dma_fence **fence)
+{
+	int r;
+
+	job->base.sched = &ring->sched;
+	r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
+
+	if (r)
+		return r;
+
+	amdgpu_job_free(job);
+	return 0;
+}
+
+static struct dma_fence *
+amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
+		      struct drm_sched_entity *s_entity)
+{
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
+	struct amdgpu_job *job = to_amdgpu_job(sched_job);
+	struct dma_fence *fence;
+	int r;
+
+	r = drm_sched_entity_error(s_entity);
+	if (r)
+		goto error;
+
+	if (job->gang_submit) {
+		fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+		if (fence)
+			return fence;
+	}
+
+	fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+	if (fence)
+		return fence;
+
+	if (job->vm && !job->vmid) {
+		r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
+		if (r) {
+			dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
+			goto error;
+		}
+		return fence;
+	}
+
+	return NULL;
+
+error:
+	dma_fence_set_error(&job->base.s_fence->finished, r);
+	return NULL;
+}
+
+static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
+{
+	struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *fence = NULL, *finished;
+	struct amdgpu_job *job;
+	int r = 0;
+
+	job = to_amdgpu_job(sched_job);
+	finished = &job->base.s_fence->finished;
+
+	trace_amdgpu_sched_run_job(job);
+
+	/* Skip job if VRAM is lost and never resubmit gangs */
+	if (job->generation != amdgpu_vm_generation(adev, job->vm) ||
+	    (job->job_run_counter && job->gang_submit))
+		dma_fence_set_error(finished, -ECANCELED);
+
+	if (finished->error < 0) {
+		dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+			ring->name);
+	} else {
+		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
+				       &fence);
+		if (r)
+			dev_err(adev->dev,
+				"Error scheduling IBs (%d) in ring(%s)", r,
+				ring->name);
+	}
+
+	job->job_run_counter++;
+	amdgpu_job_free_resources(job);
+
+	fence = r ? ERR_PTR(r) : fence;
+	return fence;
+}
+
+/*
+ * This is a duplicate function from DRM scheduler sched_internal.h.
+ * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
+ * latter being incorrect and racy.
+ *
+ * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
+ */
+static struct drm_sched_job *
+drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
+{
+	struct spsc_node *node;
+
+	node = spsc_queue_pop(&entity->job_queue);
+	if (!node)
+		return NULL;
+
+	return container_of(node, struct drm_sched_job, queue_node);
+}
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *s_job;
+	struct drm_sched_entity *s_entity = NULL;
+	int i;
+
+	/* Signal all jobs not yet scheduled */
+	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
+		struct drm_sched_rq *rq = sched->sched_rq[i];
+		spin_lock(&rq->lock);
+		list_for_each_entry(s_entity, &rq->entities, list) {
+			while ((s_job = drm_sched_entity_queue_pop(s_entity))) {
+				struct drm_sched_fence *s_fence = s_job->s_fence;
+
+				dma_fence_signal(&s_fence->scheduled);
+				dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+				dma_fence_signal(&s_fence->finished);
+			}
+		}
+		spin_unlock(&rq->lock);
+	}
+
+	/* Signal all jobs already scheduled to HW */
+	list_for_each_entry(s_job, &sched->pending_list, list) {
+		struct drm_sched_fence *s_fence = s_job->s_fence;
+
+		dma_fence_set_error(&s_fence->finished, -EHWPOISON);
+		dma_fence_signal(&s_fence->finished);
+	}
+}
+
+const struct drm_sched_backend_ops amdgpu_sched_ops = {
+	.prepare_job = amdgpu_job_prepare_job,
+	.run_job = amdgpu_job_run,
+	.timedout_job = amdgpu_job_timedout,
+	.free_job = amdgpu_job_free_cb
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
new file mode 100644
index 000000000000..7abf069d17d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_JOB_H__
+#define __AMDGPU_JOB_H__
+
+#include <drm/gpu_scheduler.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+
+/* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0)
+/* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1)
+/* bit set means context switch occured */
+#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2)
+/* bit set means IB is preempted */
+#define AMDGPU_IB_PREEMPTED                 (1 << 3)
+
+#define to_amdgpu_job(sched_job)		\
+		container_of((sched_job), struct amdgpu_job, base)
+
+#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
+
+struct amdgpu_fence;
+enum amdgpu_ib_pool_type;
+
+/* Internal kernel job ids. (decreasing values, starting from U64_MAX). */
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE              (18446744073709551615ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES         (18446744073709551614ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE        (18446744073709551613ULL)
+#define AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR            (18446744073709551612ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER         (18446744073709551611ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA (18446744073709551610ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER        (18446744073709551609ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE       (18446744073709551608ULL)
+#define AMDGPU_KERNEL_JOB_ID_MOVE_BLIT              (18446744073709551607ULL)
+#define AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER       (18446744073709551606ULL)
+#define AMDGPU_KERNEL_JOB_ID_CLEANER_SHADER         (18446744073709551605ULL)
+#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB          (18446744073709551604ULL)
+#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP           (18446744073709551603ULL)
+#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST          (18446744073709551602ULL)
+
+struct amdgpu_job {
+	struct drm_sched_job    base;
+	struct amdgpu_vm	*vm;
+	struct amdgpu_sync	explicit_sync;
+	struct amdgpu_fence	*hw_fence;
+	struct amdgpu_fence	*hw_vm_fence;
+	struct dma_fence	*gang_submit;
+	uint32_t		preamble_status;
+	uint32_t                preemption_status;
+	bool                    vm_needs_flush;
+	bool			gds_switch_needed;
+	bool			spm_update_needed;
+	uint64_t		vm_pd_addr;
+	unsigned		vmid;
+	unsigned		pasid;
+	uint32_t		gds_base, gds_size;
+	uint32_t		gws_base, gws_size;
+	uint32_t		oa_base, oa_size;
+	uint64_t		generation;
+
+	/* user fence handling */
+	uint64_t		uf_addr;
+	uint64_t		uf_sequence;
+
+	/* virtual addresses for shadow/GDS/CSA */
+	uint64_t		shadow_va;
+	uint64_t		csa_va;
+	uint64_t		gds_va;
+	bool			init_shadow;
+
+	/* job_run_counter >= 1 means a resubmit job */
+	uint32_t		job_run_counter;
+
+	/* enforce isolation */
+	bool			enforce_isolation;
+	bool			run_cleaner_shader;
+
+	uint32_t		num_ibs;
+	struct amdgpu_ib	ibs[];
+};
+
+static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
+{
+	return to_amdgpu_ring(job->base.entity->rq->sched);
+}
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct drm_sched_entity *entity, void *owner,
+		     unsigned int num_ibs, struct amdgpu_job **job,
+		     u64 drm_client_id);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+			     struct drm_sched_entity *entity, void *owner,
+			     size_t size, enum amdgpu_ib_pool_type pool_type,
+			     struct amdgpu_job **job,
+			     u64 k_job_id);
+void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
+			      struct amdgpu_bo *gws, struct amdgpu_bo *oa);
+void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
+				struct amdgpu_job *leader);
+void amdgpu_job_free(struct amdgpu_job *job);
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+			     struct dma_fence **fence);
+
+void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
new file mode 100644
index 000000000000..63ee6ba6a931
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#define JPEG_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler);
+	mutex_init(&adev->jpeg.jpeg_pg_lock);
+	atomic_set(&adev->jpeg.total_submission_cnt, 0);
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+	    (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG))
+		adev->jpeg.indirect_sram = true;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+		if (adev->jpeg.harvest_config & (1U << i))
+			continue;
+
+		if (adev->jpeg.indirect_sram) {
+			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+					AMDGPU_GEM_DOMAIN_VRAM |
+					AMDGPU_GEM_DOMAIN_GTT,
+					&adev->jpeg.inst[i].dpg_sram_bo,
+					&adev->jpeg.inst[i].dpg_sram_gpu_addr,
+					&adev->jpeg.inst[i].dpg_sram_cpu_addr);
+			if (r) {
+				dev_err(adev->dev,
+				"JPEG %d (%d) failed to allocate DPG bo\n", i, r);
+				return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1U << i))
+			continue;
+
+		amdgpu_bo_free_kernel(
+			&adev->jpeg.inst[i].dpg_sram_bo,
+			&adev->jpeg.inst[i].dpg_sram_gpu_addr,
+			(void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr);
+
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+			amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
+	}
+
+	if (adev->jpeg.reg_list)
+		amdgpu_jpeg_reg_dump_fini(adev);
+
+	mutex_destroy(&adev->jpeg.jpeg_pg_lock);
+
+	return 0;
+}
+
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev)
+{
+	cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+	return 0;
+}
+
+int amdgpu_jpeg_resume(struct amdgpu_device *adev)
+{
+	return 0;
+}
+
+static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, jpeg.idle_work.work);
+	unsigned int fences = 0;
+	unsigned int i, j;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1U << i))
+			continue;
+
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
+			fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]);
+	}
+
+	if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) {
+		mutex_lock(&adev->jpeg.jpeg_pg_lock);
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+						       AMD_PG_STATE_GATE);
+		mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+	} else
+		schedule_delayed_work(&adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	atomic_inc(&adev->jpeg.total_submission_cnt);
+	cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+	mutex_lock(&adev->jpeg.jpeg_pg_lock);
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_JPEG,
+						       AMD_PG_STATE_UNGATE);
+	mutex_unlock(&adev->jpeg.jpeg_pg_lock);
+}
+
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring)
+{
+	atomic_dec(&ring->adev->jpeg.total_submission_cnt);
+	schedule_delayed_work(&ring->adev->jpeg.idle_work, JPEG_IDLE_TIMEOUT);
+}
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	/* JPEG in SRIOV does not support direct register read/write */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	r = amdgpu_ring_alloc(ring, 3);
+	if (r)
+		return r;
+
+	WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD);
+	/* Add a read register to make sure the write register is executed. */
+	RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+
+	amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0));
+	amdgpu_ring_write(ring, 0xABADCAFE);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+		if (tmp == 0xABADCAFE)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+		struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	const unsigned ib_size_dw = 16;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job,
+				     AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+
+	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0);
+	ib->ptr[1] = 0xDEADBEEF;
+	for (i = 2; i < 16; i += 2) {
+		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	struct dma_fence *fence = NULL;
+	long r = 0;
+
+	r = amdgpu_jpeg_dec_set_reg(ring, 1, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto error;
+	} else if (r < 0) {
+		goto error;
+	} else {
+		r = 0;
+	}
+
+	if (!amdgpu_sriov_vf(adev)) {
+		for (i = 0; i < adev->usec_timeout; i++) {
+			tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]);
+			if (tmp == 0xDEADBEEF)
+				break;
+			udelay(1);
+			if (amdgpu_emu_mode == 1)
+				udelay(10);
+		}
+
+		if (i >= adev->usec_timeout)
+			r = -ETIMEDOUT;
+	}
+
+	dma_fence_put(fence);
+error:
+	return r;
+}
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->jpeg.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+
+	return 0;
+}
+
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r, i;
+
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+			if (adev->jpeg.harvest_config & (1 << i) ||
+			    !adev->jpeg.inst[i].ras_poison_irq.funcs)
+				continue;
+
+			r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
+			if (r)
+				goto late_fini;
+		}
+	}
+	return 0;
+
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
+
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_jpeg_ras *ras;
+
+	if (!adev->jpeg.ras)
+		return 0;
+
+	ras = adev->jpeg.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register jpeg ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "jpeg");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+	adev->jpeg.ras_if = &ras->ras_block.ras_comm;
+
+	if (!ras->ras_block.ras_late_init)
+		ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init;
+
+	return 0;
+}
+
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+			       enum AMDGPU_UCODE_ID ucode_id)
+{
+	struct amdgpu_firmware_info ucode = {
+		.ucode_id = AMDGPU_UCODE_ID_JPEG_RAM,
+		.mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr,
+		.ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr -
+			      (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr),
+	};
+
+	return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+/*
+ * debugfs for to enable/disable jpeg job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i, j;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+
+	mask = (1ULL << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1;
+	if ((val & mask) == 0)
+		return -EINVAL;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+			ring = &adev->jpeg.inst[i].ring_dec[j];
+			if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j)))
+				ring->sched.ready = true;
+			else
+				ring->sched.ready = false;
+		}
+	}
+	/* publish sched.ready flag update effective immediately across smp */
+	smp_rmb();
+	return 0;
+}
+
+static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i, j;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+			ring = &adev->jpeg.inst[i].ring_dec[j];
+			if (ring->sched.ready)
+				mask |= 1ULL << ((i * adev->jpeg.num_jpeg_rings) + j);
+		}
+	}
+	*val = mask;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops,
+			 amdgpu_debugfs_jpeg_sched_mask_get,
+			 amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1))
+		return;
+	sprintf(name, "amdgpu_jpeg_sched_mask");
+	debugfs_create_file(name, 0600, root, adev,
+			    &amdgpu_debugfs_jpeg_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_jpeg_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->jpeg.supported_reset);
+}
+
+static DEVICE_ATTR(jpeg_reset_mask, 0444,
+		   amdgpu_get_jpeg_reset_mask, NULL);
+
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->jpeg.num_jpeg_inst) {
+		r = device_create_file(adev->dev, &dev_attr_jpeg_reset_mask);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+	if (adev->dev->kobj.sd) {
+		if (adev->jpeg.num_jpeg_inst)
+			device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+	}
+}
+
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+			       const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+	adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
+				     sizeof(uint32_t), GFP_KERNEL);
+	if (!adev->jpeg.ip_dump) {
+		dev_err(adev->dev,
+			"Failed to allocate memory for JPEG IP Dump\n");
+		return -ENOMEM;
+	}
+	adev->jpeg.reg_list = reg;
+	adev->jpeg.reg_count = count;
+
+	return 0;
+}
+
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
+{
+	kfree(adev->jpeg.ip_dump);
+	adev->jpeg.reg_list = NULL;
+	adev->jpeg.reg_count = 0;
+}
+
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 inst_off, inst_id, is_powered;
+	int i, j;
+
+	if (!adev->jpeg.ip_dump)
+		return;
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+
+		inst_id = GET_INST(JPEG, i);
+		inst_off = i * adev->jpeg.reg_count;
+		/* check power status from UVD_JPEG_POWER_STATUS */
+		adev->jpeg.ip_dump[inst_off] =
+			RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
+							   inst_id));
+		is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+		if (is_powered)
+			for (j = 1; j < adev->jpeg.reg_count; j++)
+				adev->jpeg.ip_dump[inst_off + j] =
+					RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j],
+									   inst_id));
+	}
+}
+
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 inst_off, is_powered;
+	int i, j;
+
+	if (!adev->jpeg.ip_dump)
+		return;
+
+	drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+		if (adev->jpeg.harvest_config & (1 << i)) {
+			drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i);
+			continue;
+		}
+
+		inst_off = i * adev->jpeg.reg_count;
+		is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+		if (is_powered) {
+			drm_printf(p, "Active Instance:JPEG%d\n", i);
+			for (j = 0; j < adev->jpeg.reg_count; j++)
+				drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name,
+					   adev->jpeg.ip_dump[inst_off + j]);
+		} else
+			drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
+	}
+}
+
+static inline bool amdgpu_jpeg_reg_valid(u32 reg)
+{
+	if (reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END ||
+	    (reg >= JPEG_ATOMIC_RANGE_START && reg <= JPEG_ATOMIC_RANGE_END))
+		return false;
+	else
+		return true;
+}
+
+/**
+ * amdgpu_jpeg_dec_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+			      struct amdgpu_job *job,
+			      struct amdgpu_ib *ib)
+{
+	u32 i, reg, res, cond, type;
+	struct amdgpu_device *adev = parser->adev;
+
+	for (i = 0; i < ib->length_dw ; i += 2) {
+		reg  = CP_PACKETJ_GET_REG(ib->ptr[i]);
+		res  = CP_PACKETJ_GET_RES(ib->ptr[i]);
+		cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+		type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+		if (res) /* only support 0 at the moment */
+			return -EINVAL;
+
+		switch (type) {
+		case PACKETJ_TYPE0:
+			if (cond != PACKETJ_CONDITION_CHECK0 ||
+			    !amdgpu_jpeg_reg_valid(reg)) {
+				dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+				return -EINVAL;
+			}
+			break;
+		case PACKETJ_TYPE3:
+			if (cond != PACKETJ_CONDITION_CHECK3 ||
+			    !amdgpu_jpeg_reg_valid(reg)) {
+				dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+				return -EINVAL;
+			}
+			break;
+		case PACKETJ_TYPE6:
+			if (ib->ptr[i] == CP_PACKETJ_NOP)
+				continue;
+			dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+			return -EINVAL;
+		default:
+			dev_err(adev->dev, "Unknown packet type %d !\n", type);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
new file mode 100644
index 000000000000..346ae0ab09d3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_JPEG_H__
+#define __AMDGPU_JPEG_H__
+
+#include "amdgpu_ras.h"
+#include "amdgpu_cs.h"
+
+#define AMDGPU_MAX_JPEG_INSTANCES	4
+#define AMDGPU_MAX_JPEG_RINGS           10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3     8
+
+#define JPEG_REG_RANGE_START            0x4000
+#define JPEG_REG_RANGE_END              0x41c2
+#define JPEG_ATOMIC_RANGE_START         0x4120
+#define JPEG_ATOMIC_RANGE_END           0x412A
+
+
+#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
+#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
+
+#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect)			\
+	do {										\
+		if (!indirect) {							\
+			WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx),			\
+				     mmUVD_DPG_LMA_DATA, value);			\
+			WREG32_SOC15(							\
+				JPEG, GET_INST(JPEG, inst_idx),				\
+				mmUVD_DPG_LMA_CTL,					\
+				(UVD_DPG_LMA_CTL__READ_WRITE_MASK |			\
+				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT |	\
+				 indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT));	\
+		} else {								\
+			*adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ =		\
+				offset;							\
+			*adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ =		\
+				value;							\
+		}									\
+	} while (0)
+
+#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en)					\
+	({											\
+		WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL,					\
+			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |				\
+			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |				\
+			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));			\
+		RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA);				\
+	})
+
+#define WREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, value, indirect)		\
+	do {									\
+		WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx),			\
+			     regUVD_DPG_LMA_DATA, value);			\
+		WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx),			\
+			     regUVD_DPG_LMA_MASK, 0xFFFFFFFF);			\
+		WREG32_SOC15(							\
+			JPEG, GET_INST(JPEG, inst_idx),				\
+			regUVD_DPG_LMA_CTL,					\
+			(UVD_DPG_LMA_CTL__READ_WRITE_MASK |			\
+			 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT |	\
+			 indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT));	\
+	} while (0)
+
+#define RREG32_SOC24_JPEG_DPG_MODE(inst_idx, offset, mask_en)			\
+	do {									\
+		WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx),			\
+			regUVD_DPG_LMA_MASK, 0xFFFFFFFF);			\
+		WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx),			\
+			regUVD_DPG_LMA_CTL,					\
+			(UVD_DPG_LMA_CTL__MASK_EN_MASK |			\
+			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));	\
+		RREG32_SOC15(JPEG, inst_idx, regUVD_DPG_LMA_DATA);		\
+	} while (0)
+
+#define ADD_SOC24_JPEG_TO_DPG_SRAM(inst_idx, offset, value, indirect)		\
+	do {									\
+		*adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = offset;	\
+		*adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value;	\
+	} while (0)
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_jpeg_caps {
+	AMDGPU_JPEG_RRMT_ENABLED,
+};
+
+#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps)
+
+struct amdgpu_jpeg_reg{
+	unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
+};
+
+struct amdgpu_jpeg_inst {
+	struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS];
+	struct amdgpu_irq_src irq;
+	struct amdgpu_irq_src ras_poison_irq;
+	struct amdgpu_jpeg_reg external;
+	struct amdgpu_bo	*dpg_sram_bo;
+	struct dpg_pause_state	pause_state;
+	void			*dpg_sram_cpu_addr;
+	uint64_t		dpg_sram_gpu_addr;
+	uint32_t		*dpg_sram_curr_addr;
+	uint8_t aid_id;
+};
+
+struct amdgpu_jpeg_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_jpeg {
+	uint8_t	num_jpeg_inst;
+	struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
+	unsigned num_jpeg_rings;
+	struct amdgpu_jpeg_reg internal;
+	unsigned harvest_config;
+	struct delayed_work idle_work;
+	enum amd_powergating_state cur_state;
+	struct mutex jpeg_pg_lock;
+	atomic_t total_submission_cnt;
+	struct ras_common_if	*ras_if;
+	struct amdgpu_jpeg_ras	*ras;
+
+	uint16_t inst_mask;
+	uint8_t num_inst_per_aid;
+	bool	indirect_sram;
+	uint32_t supported_reset;
+	uint32_t caps;
+	u32 *ip_dump;
+	u32 reg_count;
+	const struct amdgpu_hwip_reg_entry *reg_list;
+};
+
+int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_suspend(struct amdgpu_device *adev);
+int amdgpu_jpeg_resume(struct amdgpu_device *adev);
+
+void amdgpu_jpeg_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_jpeg_ring_end_use(struct amdgpu_ring *ring);
+
+int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry);
+int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
+				struct ras_common_if *ras_block);
+int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+			       enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+			       const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+int amdgpu_jpeg_dec_parse_cs(struct amdgpu_cs_parser *parser,
+			     struct amdgpu_job *job,
+			     struct amdgpu_ib *ib);
+
+#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
new file mode 100644
index 000000000000..6ee77f431d56
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -0,0 +1,1944 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include "amdgpu.h"
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "atom.h"
+
+#include <linux/vga_switcheroo.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_display.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amd_pcie.h"
+#include "amdgpu_userq.h"
+
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+{
+	struct amdgpu_gpu_instance *gpu_instance;
+	int i;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	for (i = 0; i < mgpu_info.num_gpu; i++) {
+		gpu_instance = &(mgpu_info.gpu_ins[i]);
+		if (gpu_instance->adev == adev) {
+			mgpu_info.gpu_ins[i] =
+				mgpu_info.gpu_ins[mgpu_info.num_gpu - 1];
+			mgpu_info.num_gpu--;
+			if (adev->flags & AMD_IS_APU)
+				mgpu_info.num_apu--;
+			else
+				mgpu_info.num_dgpu--;
+			break;
+		}
+	}
+
+	mutex_unlock(&mgpu_info.mutex);
+}
+
+/**
+ * amdgpu_driver_unload_kms - Main unload function for KMS.
+ *
+ * @dev: drm dev pointer
+ *
+ * This is the main unload function for KMS (all asics).
+ * Returns 0 on success.
+ */
+void amdgpu_driver_unload_kms(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (adev == NULL)
+		return;
+
+	amdgpu_unregister_gpu_instance(adev);
+
+	if (adev->rmmio == NULL)
+		return;
+
+	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD))
+		DRM_WARN("smart shift update failed\n");
+
+	amdgpu_acpi_fini(adev);
+	amdgpu_device_fini_hw(adev);
+}
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+{
+	struct amdgpu_gpu_instance *gpu_instance;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
+		DRM_ERROR("Cannot register more gpu instance\n");
+		mutex_unlock(&mgpu_info.mutex);
+		return;
+	}
+
+	gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]);
+	gpu_instance->adev = adev;
+	gpu_instance->mgpu_fan_enabled = 0;
+
+	mgpu_info.num_gpu++;
+	if (adev->flags & AMD_IS_APU)
+		mgpu_info.num_apu++;
+	else
+		mgpu_info.num_dgpu++;
+
+	mutex_unlock(&mgpu_info.mutex);
+}
+
+/**
+ * amdgpu_driver_load_kms - Main load function for KMS.
+ *
+ * @adev: pointer to struct amdgpu_device
+ * @flags: device flags
+ *
+ * This is the main load function for KMS (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
+{
+	struct drm_device *dev;
+	int r, acpi_status;
+
+	dev = adev_to_drm(adev);
+
+	/* amdgpu_device_init should report only fatal error
+	 * like memory allocation failure or iomapping failure,
+	 * or memory manager initialization failure, it must
+	 * properly initialize the GPU MC controller and permit
+	 * VRAM allocation
+	 */
+	r = amdgpu_device_init(adev, flags);
+	if (r) {
+		dev_err(dev->dev, "Fatal error during GPU init\n");
+		goto out;
+	}
+
+	amdgpu_device_detect_runtime_pm_mode(adev);
+
+	/* Call ACPI methods: require modeset init
+	 * but failure is not fatal
+	 */
+
+	acpi_status = amdgpu_acpi_init(adev);
+	if (acpi_status)
+		dev_dbg(dev->dev, "Error during ACPI methods call\n");
+
+	if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD))
+		DRM_WARN("smart shift update failed\n");
+
+out:
+	if (r)
+		amdgpu_driver_unload_kms(dev);
+
+	return r;
+}
+
+static enum amd_ip_block_type
+	amdgpu_ip_get_block_type(struct amdgpu_device *adev, uint32_t ip)
+{
+	enum amd_ip_block_type type;
+
+	switch (ip) {
+	case AMDGPU_HW_IP_GFX:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		type = AMD_IP_BLOCK_TYPE_SDMA;
+		break;
+	case AMDGPU_HW_IP_UVD:
+	case AMDGPU_HW_IP_UVD_ENC:
+		type = AMD_IP_BLOCK_TYPE_UVD;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		type = AMD_IP_BLOCK_TYPE_VCE;
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+	case AMDGPU_HW_IP_VCN_ENC:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+				   AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+		break;
+	default:
+		type = AMD_IP_BLOCK_TYPE_NUM;
+		break;
+	}
+
+	return type;
+}
+
+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+				struct drm_amdgpu_query_fw *query_fw,
+				struct amdgpu_device *adev)
+{
+	switch (query_fw->fw_type) {
+	case AMDGPU_INFO_FW_VCE:
+		fw_info->ver = adev->vce.fw_version;
+		fw_info->feature = adev->vce.fb_version;
+		break;
+	case AMDGPU_INFO_FW_UVD:
+		fw_info->ver = adev->uvd.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_VCN:
+		fw_info->ver = adev->vcn.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GMC:
+		fw_info->ver = adev->gmc.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GFX_ME:
+		fw_info->ver = adev->gfx.me_fw_version;
+		fw_info->feature = adev->gfx.me_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_PFP:
+		fw_info->ver = adev->gfx.pfp_fw_version;
+		fw_info->feature = adev->gfx.pfp_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_CE:
+		fw_info->ver = adev->gfx.ce_fw_version;
+		fw_info->feature = adev->gfx.ce_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC:
+		fw_info->ver = adev->gfx.rlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+		fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+		fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+		fw_info->ver = adev->gfx.rlc_srls_fw_version;
+		fw_info->feature = adev->gfx.rlc_srls_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLCP:
+		fw_info->ver = adev->gfx.rlcp_ucode_version;
+		fw_info->feature = adev->gfx.rlcp_ucode_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLCV:
+		fw_info->ver = adev->gfx.rlcv_ucode_version;
+		fw_info->feature = adev->gfx.rlcv_ucode_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_MEC:
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->gfx.mec_fw_version;
+			fw_info->feature = adev->gfx.mec_feature_version;
+		} else if (query_fw->index == 1) {
+			fw_info->ver = adev->gfx.mec2_fw_version;
+			fw_info->feature = adev->gfx.mec2_feature_version;
+		} else
+			return -EINVAL;
+		break;
+	case AMDGPU_INFO_FW_SMC:
+		fw_info->ver = adev->pm.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_TA:
+		switch (query_fw->index) {
+		case TA_FW_TYPE_PSP_XGMI:
+			fw_info->ver = adev->psp.xgmi_context.context.bin_desc.fw_version;
+			fw_info->feature = adev->psp.xgmi_context.context
+						   .bin_desc.feature_version;
+			break;
+		case TA_FW_TYPE_PSP_RAS:
+			fw_info->ver = adev->psp.ras_context.context.bin_desc.fw_version;
+			fw_info->feature = adev->psp.ras_context.context
+						   .bin_desc.feature_version;
+			break;
+		case TA_FW_TYPE_PSP_HDCP:
+			fw_info->ver = adev->psp.hdcp_context.context.bin_desc.fw_version;
+			fw_info->feature = adev->psp.hdcp_context.context
+						   .bin_desc.feature_version;
+			break;
+		case TA_FW_TYPE_PSP_DTM:
+			fw_info->ver = adev->psp.dtm_context.context.bin_desc.fw_version;
+			fw_info->feature = adev->psp.dtm_context.context
+						   .bin_desc.feature_version;
+			break;
+		case TA_FW_TYPE_PSP_RAP:
+			fw_info->ver = adev->psp.rap_context.context.bin_desc.fw_version;
+			fw_info->feature = adev->psp.rap_context.context
+						   .bin_desc.feature_version;
+			break;
+		case TA_FW_TYPE_PSP_SECUREDISPLAY:
+			fw_info->ver = adev->psp.securedisplay_context.context.bin_desc.fw_version;
+			fw_info->feature =
+				adev->psp.securedisplay_context.context.bin_desc
+					.feature_version;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AMDGPU_INFO_FW_SDMA:
+		if (query_fw->index >= adev->sdma.num_instances)
+			return -EINVAL;
+		fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+		fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+		break;
+	case AMDGPU_INFO_FW_SOS:
+		fw_info->ver = adev->psp.sos.fw_version;
+		fw_info->feature = adev->psp.sos.feature_version;
+		break;
+	case AMDGPU_INFO_FW_ASD:
+		fw_info->ver = adev->psp.asd_context.bin_desc.fw_version;
+		fw_info->feature = adev->psp.asd_context.bin_desc.feature_version;
+		break;
+	case AMDGPU_INFO_FW_DMCU:
+		fw_info->ver = adev->dm.dmcu_fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_DMCUB:
+		fw_info->ver = adev->dm.dmcub_fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_TOC:
+		fw_info->ver = adev->psp.toc.fw_version;
+		fw_info->feature = adev->psp.toc.feature_version;
+		break;
+	case AMDGPU_INFO_FW_CAP:
+		fw_info->ver = adev->psp.cap_fw_version;
+		fw_info->feature = adev->psp.cap_feature_version;
+		break;
+	case AMDGPU_INFO_FW_MES_KIQ:
+		fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+		fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
+		break;
+	case AMDGPU_INFO_FW_MES:
+		fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+		fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
+		break;
+	case AMDGPU_INFO_FW_IMU:
+		fw_info->ver = adev->gfx.imu_fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_VPE:
+		fw_info->ver = adev->vpe.fw_version;
+		fw_info->feature = adev->vpe.feature_version;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+					  struct drm_amdgpu_info *info,
+					  struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+	int ret = -EOPNOTSUPP;
+
+	if (adev->gfx.funcs->get_gfx_shadow_info) {
+		struct amdgpu_gfx_shadow_info shadow = {};
+
+		adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+		meta->shadow_size = shadow.shadow_size;
+		meta->shadow_alignment = shadow.shadow_alignment;
+		meta->csa_size = shadow.csa_size;
+		meta->csa_alignment = shadow.csa_alignment;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
+			     struct drm_amdgpu_info *info,
+			     struct drm_amdgpu_info_hw_ip *result)
+{
+	uint32_t ib_start_alignment = 0;
+	uint32_t ib_size_alignment = 0;
+	enum amd_ip_block_type type;
+	unsigned int num_rings = 0;
+	uint32_t num_slots = 0;
+	unsigned int i, j;
+
+	if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+		return -EINVAL;
+
+	switch (info->query_hw_ip.type) {
+	case AMDGPU_HW_IP_GFX:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+			if (adev->gfx.gfx_ring[i].sched.ready &&
+			    !adev->gfx.gfx_ring[i].no_user_submission)
+				++num_rings;
+
+		if (!adev->gfx.disable_uq) {
+			for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
+				num_slots += hweight32(adev->mes.gfx_hqd_mask[i]);
+		}
+
+		ib_start_alignment = 32;
+		ib_size_alignment = 32;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		type = AMD_IP_BLOCK_TYPE_GFX;
+		for (i = 0; i < adev->gfx.num_compute_rings; i++)
+			if (adev->gfx.compute_ring[i].sched.ready &&
+			    !adev->gfx.compute_ring[i].no_user_submission)
+				++num_rings;
+
+		if (!adev->sdma.disable_uq) {
+			for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++)
+				num_slots += hweight32(adev->mes.compute_hqd_mask[i]);
+		}
+
+		ib_start_alignment = 32;
+		ib_size_alignment = 32;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		type = AMD_IP_BLOCK_TYPE_SDMA;
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			if (adev->sdma.instance[i].ring.sched.ready &&
+			    !adev->sdma.instance[i].ring.no_user_submission)
+				++num_rings;
+
+		if (!adev->gfx.disable_uq) {
+			for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
+				num_slots += hweight32(adev->mes.sdma_hqd_mask[i]);
+		}
+
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		type = AMD_IP_BLOCK_TYPE_UVD;
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (adev->uvd.harvest_config & (1 << i))
+				continue;
+
+			if (adev->uvd.inst[i].ring.sched.ready &&
+			    !adev->uvd.inst[i].ring.no_user_submission)
+				++num_rings;
+		}
+		ib_start_alignment = 256;
+		ib_size_alignment = 64;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		type = AMD_IP_BLOCK_TYPE_VCE;
+		for (i = 0; i < adev->vce.num_rings; i++)
+			if (adev->vce.ring[i].sched.ready &&
+			    !adev->vce.ring[i].no_user_submission)
+				++num_rings;
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	case AMDGPU_HW_IP_UVD_ENC:
+		type = AMD_IP_BLOCK_TYPE_UVD;
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (adev->uvd.harvest_config & (1 << i))
+				continue;
+
+			for (j = 0; j < adev->uvd.num_enc_rings; j++)
+				if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+				    !adev->uvd.inst[i].ring_enc[j].no_user_submission)
+					++num_rings;
+		}
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+			if (adev->vcn.harvest_config & (1 << i))
+				continue;
+
+			if (adev->vcn.inst[i].ring_dec.sched.ready &&
+			    !adev->vcn.inst[i].ring_dec.no_user_submission)
+				++num_rings;
+		}
+		ib_start_alignment = 256;
+		ib_size_alignment = 64;
+		break;
+	case AMDGPU_HW_IP_VCN_ENC:
+		type = AMD_IP_BLOCK_TYPE_VCN;
+		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+			if (adev->vcn.harvest_config & (1 << i))
+				continue;
+
+			for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+				if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+				    !adev->vcn.inst[i].ring_enc[j].no_user_submission)
+					++num_rings;
+		}
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
+			AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
+
+		for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+			if (adev->jpeg.harvest_config & (1 << i))
+				continue;
+
+			for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
+				if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+				    !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
+					++num_rings;
+		}
+		ib_start_alignment = 256;
+		ib_size_alignment = 64;
+		break;
+	case AMDGPU_HW_IP_VPE:
+		type = AMD_IP_BLOCK_TYPE_VPE;
+		if (adev->vpe.ring.sched.ready &&
+		    !adev->vpe.ring.no_user_submission)
+			++num_rings;
+		ib_start_alignment = 256;
+		ib_size_alignment = 4;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < adev->num_ip_blocks; i++)
+		if (adev->ip_blocks[i].version->type == type &&
+		    adev->ip_blocks[i].status.valid)
+			break;
+
+	if (i == adev->num_ip_blocks)
+		return 0;
+
+	num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
+			num_rings);
+
+	result->hw_ip_version_major = adev->ip_blocks[i].version->major;
+	result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+
+	if (adev->asic_type >= CHIP_VEGA10) {
+		switch (type) {
+		case AMD_IP_BLOCK_TYPE_GFX:
+			result->ip_discovery_version =
+				IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, GC_HWIP, 0));
+			break;
+		case AMD_IP_BLOCK_TYPE_SDMA:
+			result->ip_discovery_version =
+				IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, SDMA0_HWIP, 0));
+			break;
+		case AMD_IP_BLOCK_TYPE_UVD:
+		case AMD_IP_BLOCK_TYPE_VCN:
+		case AMD_IP_BLOCK_TYPE_JPEG:
+			result->ip_discovery_version =
+				IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, UVD_HWIP, 0));
+			break;
+		case AMD_IP_BLOCK_TYPE_VCE:
+			result->ip_discovery_version =
+				IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCE_HWIP, 0));
+			break;
+		case AMD_IP_BLOCK_TYPE_VPE:
+			result->ip_discovery_version =
+				IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0));
+			break;
+		default:
+			result->ip_discovery_version = 0;
+			break;
+		}
+	} else {
+		result->ip_discovery_version = 0;
+	}
+	result->capabilities_flags = 0;
+	result->available_rings = (1 << num_rings) - 1;
+	result->userq_num_slots = num_slots;
+	result->ib_start_alignment = ib_start_alignment;
+	result->ib_size_alignment = ib_size_alignment;
+	return 0;
+}
+
+/*
+ * Userspace get information ioctl
+ */
+/**
+ * amdgpu_info_ioctl - answer a device specific request.
+ *
+ * @dev: drm device pointer
+ * @data: request object
+ * @filp: drm filp
+ *
+ * This function is used to pass device specific parameters to the userspace
+ * drivers.  Examples include: pci device id, pipeline parms, tiling params,
+ * etc. (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
+int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_amdgpu_info *info = data;
+	struct amdgpu_mode_info *minfo = &adev->mode_info;
+	void __user *out = (void __user *)(uintptr_t)info->return_pointer;
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ip_block *ip_block;
+	enum amd_ip_block_type type;
+	struct amdgpu_xcp *xcp;
+	u32 count, inst_mask;
+	uint32_t size = info->return_size;
+	struct drm_crtc *crtc;
+	uint32_t ui32 = 0;
+	uint64_t ui64 = 0;
+	int i, found, ret;
+	int ui32_size = sizeof(ui32);
+
+	if (!info->return_size || !info->return_pointer)
+		return -EINVAL;
+
+	switch (info->query) {
+	case AMDGPU_INFO_ACCEL_WORKING:
+		ui32 = adev->accel_working;
+		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_CRTC_FROM_ID:
+		for (i = 0, found = 0; i < adev->mode_info.num_crtc; i++) {
+			crtc = (struct drm_crtc *)minfo->crtcs[i];
+			if (crtc && crtc->base.id == info->mode_crtc.id) {
+				struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+				ui32 = amdgpu_crtc->crtc_id;
+				found = 1;
+				break;
+			}
+		}
+		if (!found) {
+			DRM_DEBUG_KMS("unknown crtc id %d\n", info->mode_crtc.id);
+			return -EINVAL;
+		}
+		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_HW_IP_INFO: {
+		struct drm_amdgpu_info_hw_ip ip = {};
+
+		ret = amdgpu_hw_ip_info(adev, info, &ip);
+		if (ret)
+			return ret;
+
+		ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip)));
+		return ret ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_HW_IP_COUNT: {
+		fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+		type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type);
+		ip_block = amdgpu_device_ip_get_ip_block(adev, type);
+
+		if (!ip_block || !ip_block->status.valid)
+			return -EINVAL;
+
+		if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 &&
+		    fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
+			xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id];
+			switch (type) {
+			case AMD_IP_BLOCK_TYPE_GFX:
+				ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
+				if (ret)
+					return ret;
+				count = hweight32(inst_mask);
+				break;
+			case AMD_IP_BLOCK_TYPE_SDMA:
+				ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask);
+				if (ret)
+					return ret;
+				count = hweight32(inst_mask);
+				break;
+			case AMD_IP_BLOCK_TYPE_JPEG:
+				ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+				if (ret)
+					return ret;
+				count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings;
+				break;
+			case AMD_IP_BLOCK_TYPE_VCN:
+				ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
+				if (ret)
+					return ret;
+				count = hweight32(inst_mask);
+				break;
+			default:
+				return -EINVAL;
+			}
+
+			return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+		}
+
+		switch (type) {
+		case AMD_IP_BLOCK_TYPE_GFX:
+		case AMD_IP_BLOCK_TYPE_VCE:
+			count = 1;
+			break;
+		case AMD_IP_BLOCK_TYPE_SDMA:
+			count = adev->sdma.num_instances;
+			break;
+		case AMD_IP_BLOCK_TYPE_JPEG:
+			count = adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings;
+			break;
+		case AMD_IP_BLOCK_TYPE_VCN:
+			count = adev->vcn.num_vcn_inst;
+			break;
+		case AMD_IP_BLOCK_TYPE_UVD:
+			count = adev->uvd.num_uvd_inst;
+			break;
+		/* For all other IP block types not listed in the switch statement
+		 * the ip status is valid here and the instance count is one.
+		 */
+		default:
+			count = 1;
+			break;
+		}
+
+		return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_TIMESTAMP:
+		ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_FW_VERSION: {
+		struct drm_amdgpu_info_firmware fw_info;
+
+		/* We only support one instance of each IP block right now. */
+		if (info->query_fw.ip_instance != 0)
+			return -EINVAL;
+
+		ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+		if (ret)
+			return ret;
+
+		return copy_to_user(out, &fw_info,
+				    min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_NUM_BYTES_MOVED:
+		ui64 = atomic64_read(&adev->num_bytes_moved);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_NUM_EVICTIONS:
+		ui64 = atomic64_read(&adev->num_evictions);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS:
+		ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_VRAM_USAGE:
+		ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+			ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0;
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_VIS_VRAM_USAGE:
+		ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_GTT_USAGE:
+		ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager);
+		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_GDS_CONFIG: {
+		struct drm_amdgpu_info_gds gds_info;
+
+		memset(&gds_info, 0, sizeof(gds_info));
+		gds_info.compute_partition_size = adev->gds.gds_size;
+		gds_info.gds_total_size = adev->gds.gds_size;
+		gds_info.gws_per_compute_partition = adev->gds.gws_size;
+		gds_info.oa_per_compute_partition = adev->gds.oa_size;
+		return copy_to_user(out, &gds_info,
+				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_VRAM_GTT: {
+		struct drm_amdgpu_info_vram_gtt vram_gtt;
+
+		vram_gtt.vram_size = adev->gmc.real_vram_size -
+			atomic64_read(&adev->vram_pin_size) -
+			AMDGPU_VM_RESERVED_VRAM;
+		vram_gtt.vram_cpu_accessible_size =
+			min(adev->gmc.visible_vram_size -
+			    atomic64_read(&adev->visible_pin_size),
+			    vram_gtt.vram_size);
+		vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
+		vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
+		return copy_to_user(out, &vram_gtt,
+				    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_MEMORY: {
+		struct drm_amdgpu_memory_info mem;
+		struct ttm_resource_manager *gtt_man =
+			&adev->mman.gtt_mgr.manager;
+		struct ttm_resource_manager *vram_man =
+			&adev->mman.vram_mgr.manager;
+
+		memset(&mem, 0, sizeof(mem));
+		mem.vram.total_heap_size = adev->gmc.real_vram_size;
+		mem.vram.usable_heap_size = adev->gmc.real_vram_size -
+			atomic64_read(&adev->vram_pin_size) -
+			AMDGPU_VM_RESERVED_VRAM;
+		mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+				ttm_resource_manager_usage(vram_man) : 0;
+		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
+
+		mem.cpu_accessible_vram.total_heap_size =
+			adev->gmc.visible_vram_size;
+		mem.cpu_accessible_vram.usable_heap_size =
+			min(adev->gmc.visible_vram_size -
+			    atomic64_read(&adev->visible_pin_size),
+			    mem.vram.usable_heap_size);
+		mem.cpu_accessible_vram.heap_usage =
+			amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
+		mem.cpu_accessible_vram.max_allocation =
+			mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
+
+		mem.gtt.total_heap_size = gtt_man->size;
+		mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
+			atomic64_read(&adev->gart_pin_size);
+		mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
+		mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
+
+		return copy_to_user(out, &mem,
+				    min((size_t)size, sizeof(mem)))
+				    ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_READ_MMR_REG: {
+		int ret = 0;
+		unsigned int n, alloc_size;
+		uint32_t *regs;
+		unsigned int se_num = (info->read_mmr_reg.instance >>
+				   AMDGPU_INFO_MMR_SE_INDEX_SHIFT) &
+				  AMDGPU_INFO_MMR_SE_INDEX_MASK;
+		unsigned int sh_num = (info->read_mmr_reg.instance >>
+				   AMDGPU_INFO_MMR_SH_INDEX_SHIFT) &
+				  AMDGPU_INFO_MMR_SH_INDEX_MASK;
+
+		if (!down_read_trylock(&adev->reset_domain->sem))
+			return -ENOENT;
+
+		/* set full masks if the userspace set all bits
+		 * in the bitfields
+		 */
+		if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) {
+			se_num = 0xffffffff;
+		} else if (se_num >= AMDGPU_GFX_MAX_SE) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK) {
+			sh_num = 0xffffffff;
+		} else if (sh_num >= AMDGPU_GFX_MAX_SH_PER_SE) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (info->read_mmr_reg.count > 128) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);
+		if (!regs) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		alloc_size = info->read_mmr_reg.count * sizeof(*regs);
+
+		amdgpu_gfx_off_ctrl(adev, false);
+		for (i = 0; i < info->read_mmr_reg.count; i++) {
+			if (amdgpu_asic_read_register(adev, se_num, sh_num,
+						      info->read_mmr_reg.dword_offset + i,
+						      &regs[i])) {
+				DRM_DEBUG_KMS("unallowed offset %#x\n",
+					      info->read_mmr_reg.dword_offset + i);
+				kfree(regs);
+				amdgpu_gfx_off_ctrl(adev, true);
+				ret = -EFAULT;
+				goto out;
+			}
+		}
+		amdgpu_gfx_off_ctrl(adev, true);
+		n = copy_to_user(out, regs, min(size, alloc_size));
+		kfree(regs);
+		ret = (n ? -EFAULT : 0);
+out:
+		up_read(&adev->reset_domain->sem);
+		return ret;
+	}
+	case AMDGPU_INFO_DEV_INFO: {
+		struct drm_amdgpu_info_device *dev_info;
+		uint64_t vm_size;
+		uint32_t pcie_gen_mask, pcie_width_mask;
+
+		dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
+		if (!dev_info)
+			return -ENOMEM;
+
+		dev_info->device_id = adev->pdev->device;
+		dev_info->chip_rev = adev->rev_id;
+		dev_info->external_rev = adev->external_rev_id;
+		dev_info->pci_rev = adev->pdev->revision;
+		dev_info->family = adev->family;
+		dev_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+		dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+		/* return all clocks in KHz */
+		dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
+		if (adev->pm.dpm_enabled) {
+			dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
+			dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
+			dev_info->min_engine_clock = amdgpu_dpm_get_sclk(adev, true) * 10;
+			dev_info->min_memory_clock = amdgpu_dpm_get_mclk(adev, true) * 10;
+		} else {
+			dev_info->max_engine_clock =
+				dev_info->min_engine_clock =
+					adev->clock.default_sclk * 10;
+			dev_info->max_memory_clock =
+				dev_info->min_memory_clock =
+					adev->clock.default_mclk * 10;
+		}
+		dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
+		dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
+		dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
+		dev_info->ids_flags = 0;
+		if (adev->flags & AMD_IS_APU)
+			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
+		if (adev->gfx.mcbp)
+			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
+		if (amdgpu_is_tmz(adev))
+			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
+		if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
+			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+
+		/* Gang submit is not supported under SRIOV currently */
+		if (!amdgpu_sriov_vf(adev))
+			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_GANG_SUBMIT;
+
+		if (amdgpu_passthrough(adev))
+			dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+						AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+						AMDGPU_IDS_FLAGS_MODE_MASK;
+		else if (amdgpu_sriov_vf(adev))
+			dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+						AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+						AMDGPU_IDS_FLAGS_MODE_MASK;
+
+		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+		vm_size -= AMDGPU_VA_RESERVED_TOP;
+
+		/* Older VCE FW versions are buggy and can handle only 40bits */
+		if (adev->vce.fw_version &&
+		    adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+			vm_size = min(vm_size, 1ULL << 40);
+
+		dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM;
+		dev_info->virtual_address_max =
+			min(vm_size, AMDGPU_GMC_HOLE_START);
+
+		if (vm_size > AMDGPU_GMC_HOLE_START) {
+			dev_info->high_va_offset = AMDGPU_GMC_HOLE_END;
+			dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
+		}
+		dev_info->virtual_address_alignment = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+		dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
+		dev_info->gart_page_size = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+		dev_info->cu_active_number = adev->gfx.cu_info.number;
+		dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
+		dev_info->ce_ram_size = adev->gfx.ce_ram_size;
+		memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],
+		       sizeof(adev->gfx.cu_info.ao_cu_bitmap));
+		memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
+		       sizeof(dev_info->cu_bitmap));
+		dev_info->vram_type = adev->gmc.vram_type;
+		dev_info->vram_bit_width = adev->gmc.vram_width;
+		dev_info->vce_harvest_config = adev->vce.harvest_config;
+		dev_info->gc_double_offchip_lds_buf =
+			adev->gfx.config.double_offchip_lds_buf;
+		dev_info->wave_front_size = adev->gfx.cu_info.wave_front_size;
+		dev_info->num_shader_visible_vgprs = adev->gfx.config.max_gprs;
+		dev_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+		dev_info->num_tcc_blocks = adev->gfx.config.max_texture_channel_caches;
+		dev_info->gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth;
+		dev_info->gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
+		dev_info->max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;
+
+		if (adev->family >= AMDGPU_FAMILY_NV)
+			dev_info->pa_sc_tile_steering_override =
+				adev->gfx.config.pa_sc_tile_steering_override;
+
+		dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
+
+		/* Combine the chip gen mask with the platform (CPU/mobo) mask. */
+		pcie_gen_mask = adev->pm.pcie_gen_mask &
+			(adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT);
+		pcie_width_mask = adev->pm.pcie_mlw_mask &
+			(adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT);
+		dev_info->pcie_gen = fls(pcie_gen_mask);
+		dev_info->pcie_num_lanes =
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+			pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+
+		dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
+		dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
+		dev_info->sqc_data_cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+		dev_info->sqc_inst_cache_size = adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+		dev_info->gl1c_cache_size = adev->gfx.config.gc_gl1c_size_per_instance *
+					    adev->gfx.config.gc_gl1c_per_sa;
+		dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+		dev_info->mall_size = adev->gmc.mall_size;
+
+
+		if (adev->gfx.funcs->get_gfx_shadow_info) {
+			struct amdgpu_gfx_shadow_info shadow_info;
+
+			ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+			if (!ret) {
+				dev_info->shadow_size = shadow_info.shadow_size;
+				dev_info->shadow_alignment = shadow_info.shadow_alignment;
+				dev_info->csa_size = shadow_info.csa_size;
+				dev_info->csa_alignment = shadow_info.csa_alignment;
+			}
+		}
+
+		dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
+		ret = copy_to_user(out, dev_info,
+				   min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
+		kfree(dev_info);
+		return ret;
+	}
+	case AMDGPU_INFO_VCE_CLOCK_TABLE: {
+		unsigned int i;
+		struct drm_amdgpu_info_vce_clock_table vce_clk_table = {};
+		struct amd_vce_state *vce_state;
+
+		for (i = 0; i < AMDGPU_VCE_CLOCK_TABLE_ENTRIES; i++) {
+			vce_state = amdgpu_dpm_get_vce_clock_state(adev, i);
+			if (vce_state) {
+				vce_clk_table.entries[i].sclk = vce_state->sclk;
+				vce_clk_table.entries[i].mclk = vce_state->mclk;
+				vce_clk_table.entries[i].eclk = vce_state->evclk;
+				vce_clk_table.num_valid_entries++;
+			}
+		}
+
+		return copy_to_user(out, &vce_clk_table,
+				    min((size_t)size, sizeof(vce_clk_table))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_VBIOS: {
+		uint32_t bios_size = adev->bios_size;
+
+		switch (info->vbios_info.type) {
+		case AMDGPU_INFO_VBIOS_SIZE:
+			return copy_to_user(out, &bios_size,
+					min((size_t)size, sizeof(bios_size)))
+					? -EFAULT : 0;
+		case AMDGPU_INFO_VBIOS_IMAGE: {
+			uint8_t *bios;
+			uint32_t bios_offset = info->vbios_info.offset;
+
+			if (bios_offset >= bios_size)
+				return -EINVAL;
+
+			bios = adev->bios + bios_offset;
+			return copy_to_user(out, bios,
+					    min((size_t)size, (size_t)(bios_size - bios_offset)))
+					? -EFAULT : 0;
+		}
+		case AMDGPU_INFO_VBIOS_INFO: {
+			struct drm_amdgpu_info_vbios vbios_info = {};
+			struct atom_context *atom_context;
+
+			atom_context = adev->mode_info.atom_context;
+			if (atom_context) {
+				memcpy(vbios_info.name, atom_context->name,
+				       sizeof(atom_context->name));
+				memcpy(vbios_info.vbios_pn, atom_context->vbios_pn,
+				       sizeof(atom_context->vbios_pn));
+				vbios_info.version = atom_context->version;
+				memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str,
+				       sizeof(atom_context->vbios_ver_str));
+				memcpy(vbios_info.date, atom_context->date,
+				       sizeof(atom_context->date));
+			}
+
+			return copy_to_user(out, &vbios_info,
+						min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0;
+		}
+		default:
+			DRM_DEBUG_KMS("Invalid request %d\n",
+					info->vbios_info.type);
+			return -EINVAL;
+		}
+	}
+	case AMDGPU_INFO_NUM_HANDLES: {
+		struct drm_amdgpu_info_num_handles handle;
+
+		switch (info->query_hw_ip.type) {
+		case AMDGPU_HW_IP_UVD:
+			/* Starting Polaris, we support unlimited UVD handles */
+			if (adev->asic_type < CHIP_POLARIS10) {
+				handle.uvd_max_handles = adev->uvd.max_handles;
+				handle.uvd_used_handles = amdgpu_uvd_used_handles(adev);
+
+				return copy_to_user(out, &handle,
+					min((size_t)size, sizeof(handle))) ? -EFAULT : 0;
+			} else {
+				return -ENODATA;
+			}
+
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	case AMDGPU_INFO_SENSOR: {
+		if (!adev->pm.dpm_enabled)
+			return -ENOENT;
+
+		switch (info->sensor_info.type) {
+		case AMDGPU_INFO_SENSOR_GFX_SCLK:
+			/* get sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GFX_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_GFX_MCLK:
+			/* get mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GFX_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_GPU_TEMP:
+			/* get temperature in millidegrees C */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GPU_TEMP,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			break;
+		case AMDGPU_INFO_SENSOR_GPU_LOAD:
+			/* get GPU load */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GPU_LOAD,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			break;
+		case AMDGPU_INFO_SENSOR_GPU_AVG_POWER:
+			/* get average GPU power */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GPU_AVG_POWER,
+						   (void *)&ui32, &ui32_size)) {
+				/* fall back to input power for backwards compat */
+				if (amdgpu_dpm_read_sensor(adev,
+							   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+							   (void *)&ui32, &ui32_size)) {
+					return -EINVAL;
+				}
+			}
+			ui32 >>= 8;
+			break;
+		case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER:
+			/* get input GPU power */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_GPU_INPUT_POWER,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 >>= 8;
+			break;
+		case AMDGPU_INFO_SENSOR_VDDNB:
+			/* get VDDNB in millivolts */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_VDDNB,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			break;
+		case AMDGPU_INFO_SENSOR_VDDGFX:
+			/* get VDDGFX in millivolts */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_VDDGFX,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+			/* get stable pstate sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+			/* get stable pstate mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK:
+			/* get peak pstate sclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		case AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK:
+			/* get peak pstate mclk in Mhz */
+			if (amdgpu_dpm_read_sensor(adev,
+						   AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
+						   (void *)&ui32, &ui32_size)) {
+				return -EINVAL;
+			}
+			ui32 /= 100;
+			break;
+		default:
+			DRM_DEBUG_KMS("Invalid request %d\n",
+				      info->sensor_info.type);
+			return -EINVAL;
+		}
+		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_VRAM_LOST_COUNTER:
+		ui32 = atomic_read(&adev->vram_lost_counter);
+		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_RAS_ENABLED_FEATURES: {
+		struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+		uint64_t ras_mask;
+
+		if (!ras)
+			return -EINVAL;
+		ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
+
+		return copy_to_user(out, &ras_mask,
+				min_t(u64, size, sizeof(ras_mask))) ?
+			-EFAULT : 0;
+	}
+	case AMDGPU_INFO_VIDEO_CAPS: {
+		const struct amdgpu_video_codecs *codecs;
+		struct drm_amdgpu_info_video_caps *caps;
+		int r;
+
+		if (!adev->asic_funcs->query_video_codecs)
+			return -EINVAL;
+
+		switch (info->video_cap.type) {
+		case AMDGPU_INFO_VIDEO_CAPS_DECODE:
+			r = amdgpu_asic_query_video_codecs(adev, false, &codecs);
+			if (r)
+				return -EINVAL;
+			break;
+		case AMDGPU_INFO_VIDEO_CAPS_ENCODE:
+			r = amdgpu_asic_query_video_codecs(adev, true, &codecs);
+			if (r)
+				return -EINVAL;
+			break;
+		default:
+			DRM_DEBUG_KMS("Invalid request %d\n",
+				      info->video_cap.type);
+			return -EINVAL;
+		}
+
+		caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+		if (!caps)
+			return -ENOMEM;
+
+		for (i = 0; i < codecs->codec_count; i++) {
+			int idx = codecs->codec_array[i].codec_type;
+
+			switch (idx) {
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9:
+			case AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1:
+				caps->codec_info[idx].valid = 1;
+				caps->codec_info[idx].max_width =
+					codecs->codec_array[i].max_width;
+				caps->codec_info[idx].max_height =
+					codecs->codec_array[i].max_height;
+				caps->codec_info[idx].max_pixels_per_frame =
+					codecs->codec_array[i].max_pixels_per_frame;
+				caps->codec_info[idx].max_level =
+					codecs->codec_array[i].max_level;
+				break;
+			default:
+				break;
+			}
+		}
+		r = copy_to_user(out, caps,
+				 min((size_t)size, sizeof(*caps))) ? -EFAULT : 0;
+		kfree(caps);
+		return r;
+	}
+	case AMDGPU_INFO_MAX_IBS: {
+		uint32_t max_ibs[AMDGPU_HW_IP_NUM];
+
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+			max_ibs[i] = amdgpu_ring_max_ibs(i);
+
+		return copy_to_user(out, max_ibs,
+				    min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_GPUVM_FAULT: {
+		struct amdgpu_fpriv *fpriv = filp->driver_priv;
+		struct amdgpu_vm *vm = &fpriv->vm;
+		struct drm_amdgpu_info_gpuvm_fault gpuvm_fault;
+		unsigned long flags;
+
+		if (!vm)
+			return -EINVAL;
+
+		memset(&gpuvm_fault, 0, sizeof(gpuvm_fault));
+
+		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+		gpuvm_fault.addr = vm->fault_info.addr;
+		gpuvm_fault.status = vm->fault_info.status;
+		gpuvm_fault.vmhub = vm->fault_info.vmhub;
+		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+		return copy_to_user(out, &gpuvm_fault,
+				    min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
+	}
+	case AMDGPU_INFO_UQ_FW_AREAS: {
+		struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+		switch (info->query_hw_ip.type) {
+		case AMDGPU_HW_IP_GFX:
+			ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+			if (ret)
+				return ret;
+
+			ret = copy_to_user(out, &meta_info,
+						min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+			return 0;
+		default:
+			return -EINVAL;
+		}
+	}
+	default:
+		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_driver_open_kms - drm callback for open
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device open, init vm on cayman+ (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv;
+	int r, pasid;
+
+	/* Ensure IB tests are run on ring */
+	flush_delayed_work(&adev->delayed_init_work);
+
+
+	if (amdgpu_ras_intr_triggered()) {
+		DRM_ERROR("RAS Intr triggered, device disabled!!");
+		return -EHWPOISON;
+	}
+
+	file_priv->driver_priv = NULL;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0)
+		goto pm_put;
+
+	fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
+	if (unlikely(!fpriv)) {
+		r = -ENOMEM;
+		goto out_suspend;
+	}
+
+	pasid = amdgpu_pasid_alloc(16);
+	if (pasid < 0) {
+		dev_warn(adev->dev, "No more PASIDs available!");
+		pasid = 0;
+	}
+
+	r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+	if (r)
+		goto error_pasid;
+
+	amdgpu_debugfs_vm_init(file_priv);
+
+	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
+	if (r)
+		goto error_pasid;
+
+	fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
+	if (!fpriv->prt_va) {
+		r = -ENOMEM;
+		goto error_vm;
+	}
+
+	if (adev->gfx.mcbp) {
+		uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
+
+		r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+						&fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
+		if (r)
+			goto error_vm;
+	}
+
+	r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va);
+	if (r)
+		goto error_vm;
+
+	mutex_init(&fpriv->bo_list_lock);
+	idr_init_base(&fpriv->bo_list_handles, 1);
+
+	r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+	if (r)
+		DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+
+	r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+	if (r)
+		goto error_vm;
+
+	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
+
+	file_priv->driver_priv = fpriv;
+	goto out_suspend;
+
+error_vm:
+	amdgpu_vm_fini(adev, &fpriv->vm);
+
+error_pasid:
+	if (pasid)
+		amdgpu_pasid_free(pasid);
+
+	kfree(fpriv);
+
+out_suspend:
+pm_put:
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return r;
+}
+
+/**
+ * amdgpu_driver_postclose_kms - drm callback for post close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device post close, tear down vm on cayman+ (all asics).
+ */
+void amdgpu_driver_postclose_kms(struct drm_device *dev,
+				 struct drm_file *file_priv)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+	struct amdgpu_bo_list *list;
+	struct amdgpu_bo *pd;
+	u32 pasid;
+	int handle;
+
+	if (!fpriv)
+		return;
+
+	pm_runtime_get_sync(dev->dev);
+
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
+		amdgpu_uvd_free_handles(adev, file_priv);
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
+		amdgpu_vce_free_handles(adev, file_priv);
+
+	if (fpriv->csa_va) {
+		uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
+
+		WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
+						fpriv->csa_va, csa_addr));
+		fpriv->csa_va = NULL;
+	}
+
+	amdgpu_seq64_unmap(adev, fpriv);
+
+	pasid = fpriv->vm.pasid;
+	pd = amdgpu_bo_ref(fpriv->vm.root.bo);
+	if (!WARN_ON(amdgpu_bo_reserve(pd, true))) {
+		amdgpu_vm_bo_del(adev, fpriv->prt_va);
+		amdgpu_bo_unreserve(pd);
+	}
+
+	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+	amdgpu_vm_fini(adev, &fpriv->vm);
+
+	if (pasid)
+		amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
+	amdgpu_bo_unref(&pd);
+
+	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
+		amdgpu_bo_list_put(list);
+
+	idr_destroy(&fpriv->bo_list_handles);
+	mutex_destroy(&fpriv->bo_list_lock);
+
+	kfree(fpriv);
+	file_priv->driver_priv = NULL;
+
+	pm_runtime_put_autosuspend(dev->dev);
+}
+
+
+void amdgpu_driver_release_kms(struct drm_device *dev)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	amdgpu_device_fini_sw(adev);
+	pci_set_drvdata(adev->pdev, NULL);
+}
+
+/*
+ * VBlank related functions.
+ */
+/**
+ * amdgpu_get_vblank_counter_kms - get frame count
+ *
+ * @crtc: crtc to get the frame count from
+ *
+ * Gets the frame count on the requested crtc (all asics).
+ * Returns frame count on success, -EINVAL on failure.
+ */
+u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	unsigned int pipe = crtc->index;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int vpos, hpos, stat;
+	u32 count;
+
+	if (pipe >= adev->mode_info.num_crtc) {
+		DRM_ERROR("Invalid crtc %u\n", pipe);
+		return -EINVAL;
+	}
+
+	/* The hw increments its frame counter at start of vsync, not at start
+	 * of vblank, as is required by DRM core vblank counter handling.
+	 * Cook the hw count here to make it appear to the caller as if it
+	 * incremented at start of vblank. We measure distance to start of
+	 * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+	 * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+	 * result by 1 to give the proper appearance to caller.
+	 */
+	if (adev->mode_info.crtcs[pipe]) {
+		/* Repeat readout if needed to provide stable result if
+		 * we cross start of vsync during the queries.
+		 */
+		do {
+			count = amdgpu_display_vblank_get_counter(adev, pipe);
+			/* Ask amdgpu_display_get_crtc_scanoutpos to return
+			 * vpos as distance to start of vblank, instead of
+			 * regular vertical scanout pos.
+			 */
+			stat = amdgpu_display_get_crtc_scanoutpos(
+				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+				&vpos, &hpos, NULL, NULL,
+				&adev->mode_info.crtcs[pipe]->base.hwmode);
+		} while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+		if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+			DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+		} else {
+			DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+				      pipe, vpos);
+
+			/* Bump counter if we are at >= leading edge of vblank,
+			 * but before vsync where vpos would turn negative and
+			 * the hw counter really increments.
+			 */
+			if (vpos >= 0)
+				count++;
+		}
+	} else {
+		/* Fallback to use value as is. */
+		count = amdgpu_display_vblank_get_counter(adev, pipe);
+		DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+	}
+
+	return count;
+}
+
+/**
+ * amdgpu_enable_vblank_kms - enable vblank interrupt
+ *
+ * @crtc: crtc to enable vblank interrupt for
+ *
+ * Enable the interrupt on the requested crtc (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
+int amdgpu_enable_vblank_kms(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	unsigned int pipe = crtc->index;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
+
+	return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
+}
+
+/**
+ * amdgpu_disable_vblank_kms - disable vblank interrupt
+ *
+ * @crtc: crtc to disable vblank interrupt for
+ *
+ * Disable the interrupt on the requested crtc (all asics).
+ */
+void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	unsigned int pipe = crtc->index;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
+
+	amdgpu_irq_put(adev, &adev->crtc_irq, idx);
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	struct drm_amdgpu_info_firmware fw_info;
+	struct drm_amdgpu_query_fw query_fw;
+	struct atom_context *ctx = adev->mode_info.atom_context;
+	uint8_t smu_program, smu_major, smu_minor, smu_debug;
+	int ret, i;
+
+	static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = {
+#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type
+		TA_FW_NAME(XGMI),
+		TA_FW_NAME(RAS),
+		TA_FW_NAME(HDCP),
+		TA_FW_NAME(DTM),
+		TA_FW_NAME(RAP),
+		TA_FW_NAME(SECUREDISPLAY),
+#undef TA_FW_NAME
+	};
+
+	/* VCE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* UVD */
+	query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* GMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* ME */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PFP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CE */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST CNTL */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST GPM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST SRM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLCP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLCP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLCV */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLCV feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC2 */
+	if (adev->gfx.mec2_fw) {
+		query_fw.index = 1;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+			   fw_info.feature, fw_info.ver);
+	}
+
+	/* IMU */
+	query_fw.fw_type = AMDGPU_INFO_FW_IMU;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PSP SOS */
+	query_fw.fw_type = AMDGPU_INFO_FW_SOS;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+
+	/* PSP ASD */
+	query_fw.fw_type = AMDGPU_INFO_FW_ASD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	query_fw.fw_type = AMDGPU_INFO_FW_TA;
+	for (i = TA_FW_TYPE_PSP_XGMI; i < TA_FW_TYPE_MAX_INDEX; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			continue;
+
+		seq_printf(m, "TA %s feature version: 0x%08x, firmware version: 0x%08x\n",
+			   ta_fw_name[i], fw_info.feature, fw_info.ver);
+	}
+
+	/* SMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	smu_program = (fw_info.ver >> 24) & 0xff;
+	smu_major = (fw_info.ver >> 16) & 0xff;
+	smu_minor = (fw_info.ver >> 8) & 0xff;
+	smu_debug = (fw_info.ver >> 0) & 0xff;
+	seq_printf(m, "SMC feature version: %u, program: %d, firmware version: 0x%08x (%d.%d.%d)\n",
+		   fw_info.feature, smu_program, fw_info.ver, smu_major, smu_minor, smu_debug);
+
+	/* SDMA */
+	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, fw_info.feature, fw_info.ver);
+	}
+
+	/* VCN */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCN;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* DMCU */
+	query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* DMCUB */
+	query_fw.fw_type = AMDGPU_INFO_FW_DMCUB;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* TOC */
+	query_fw.fw_type = AMDGPU_INFO_FW_TOC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CAP */
+	if (adev->psp.cap_fw) {
+		query_fw.fw_type = AMDGPU_INFO_FW_CAP;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "CAP feature version: %u, firmware version: 0x%08x\n",
+				fw_info.feature, fw_info.ver);
+	}
+
+	/* MES_KIQ */
+	query_fw.fw_type = AMDGPU_INFO_FW_MES_KIQ;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MES_KIQ feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MES */
+	query_fw.fw_type = AMDGPU_INFO_FW_MES;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* VPE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VPE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VPE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_firmware_info);
+
+#endif
+
+void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file("amdgpu_firmware_info", 0444, root,
+			    adev, &amdgpu_debugfs_firmware_info_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
new file mode 100644
index 000000000000..4d1d4994ea3f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_lsdma.h"
+
+#define AMDGPU_LSDMA_MAX_SIZE	0x2000000ULL
+
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev,
+			  uint32_t reg_index, uint32_t reg_val,
+			  uint32_t mask)
+{
+	uint32_t val;
+	int i;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		val = RREG32(reg_index);
+		if ((val & mask) == reg_val)
+			return 0;
+		udelay(1);
+	}
+
+	return -ETIME;
+}
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev,
+			  uint64_t src_addr,
+			  uint64_t dst_addr,
+			  uint64_t mem_size)
+{
+	int ret;
+
+	if (mem_size == 0)
+		return -EINVAL;
+
+	while (mem_size > 0) {
+		uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+		ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size);
+		if (ret)
+			return ret;
+		src_addr += current_copy_size;
+		dst_addr += current_copy_size;
+		mem_size -= current_copy_size;
+	}
+
+	return 0;
+}
+
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev,
+			  uint64_t dst_addr,
+			  uint32_t data,
+			  uint64_t mem_size)
+{
+	int ret;
+
+	if (mem_size == 0)
+		return -EINVAL;
+
+	while (mem_size > 0) {
+		uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE);
+
+		ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size);
+		if (ret)
+			return ret;
+		dst_addr += current_fill_size;
+		mem_size -= current_fill_size;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
new file mode 100644
index 000000000000..c61ba58c5ee0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_LSDMA_H__
+#define __AMDGPU_LSDMA_H__
+
+struct amdgpu_lsdma {
+	const struct amdgpu_lsdma_funcs      *funcs;
+};
+
+struct amdgpu_lsdma_funcs {
+	int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr,
+			uint64_t dst_addr, uint64_t size);
+	int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr,
+			uint32_t data, uint64_t size);
+	void (*update_memory_power_gating)(struct amdgpu_device *adev, bool enable);
+};
+
+int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr,
+			  uint64_t dst_addr, uint64_t mem_size);
+int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr,
+			  uint32_t data, uint64_t mem_size);
+int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index,
+			  uint32_t reg_val, uint32_t mask);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
new file mode 100644
index 000000000000..3ca03b5e0f91
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -0,0 +1,630 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+#include "amdgpu_mca.h"
+
+#include "umc/umc_6_7_0_offset.h"
+#include "umc/umc_6_7_0_sh_mask.h"
+
+static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev,
+					uint64_t mc_status)
+{
+	if (adev->umc.ras->check_ecc_err_status)
+		return adev->umc.ras->check_ecc_err_status(adev,
+				AMDGPU_MCA_ERROR_TYPE_DE, &mc_status);
+
+	return false;
+}
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+					      uint64_t mc_status_addr,
+					      unsigned long *error_count)
+{
+	uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+	if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+		*error_count += 1;
+}
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+						uint64_t mc_status_addr,
+						unsigned long *error_count)
+{
+	uint64_t mc_status = RREG64_PCIE(mc_status_addr);
+
+	if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+	    (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+	    REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+		*error_count += 1;
+}
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+				  uint64_t mc_status_addr)
+{
+	WREG64_PCIE(mc_status_addr, 0x0ULL);
+}
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+				      uint64_t mc_status_addr,
+				      void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+	amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count));
+	amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count));
+
+	amdgpu_mca_reset_error_count(adev, mc_status_addr);
+}
+
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_mca_ras_block *ras;
+
+	if (!adev->mca.mp0.ras)
+		return 0;
+
+	ras = adev->mca.mp0.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_mca_ras_block *ras;
+
+	if (!adev->mca.mp1.ras)
+		return 0;
+
+	ras = adev->mca.mp1.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_mca_ras_block *ras;
+
+	if (!adev->mca.mpio.ras)
+		return 0;
+
+	ras = adev->mca.mpio.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
+{
+	if (!mca_set)
+		return;
+
+	memset(mca_set, 0, sizeof(*mca_set));
+	INIT_LIST_HEAD(&mca_set->list);
+}
+
+static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
+{
+	struct mca_bank_node *node;
+
+	if (!entry)
+		return -EINVAL;
+
+	node = kvzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	memcpy(&node->entry, entry, sizeof(*entry));
+
+	INIT_LIST_HEAD(&node->node);
+	list_add_tail(&node->node, &mca_set->list);
+
+	mca_set->nr_entries++;
+
+	return 0;
+}
+
+static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new)
+{
+	struct mca_bank_node *node;
+
+	list_for_each_entry(node, &new->list, node)
+		amdgpu_mca_bank_set_add_entry(mca_set, &node->entry);
+
+	return 0;
+}
+
+static void amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node)
+{
+	if (!node)
+		return;
+
+	list_del(&node->node);
+	kvfree(node);
+
+	mca_set->nr_entries--;
+}
+
+static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
+{
+	struct mca_bank_node *node, *tmp;
+
+	if (list_empty(&mca_set->list))
+		return;
+
+	list_for_each_entry_safe(node, tmp, &mca_set->list, node)
+		amdgpu_mca_bank_set_remove_node(mca_set, node);
+}
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs)
+{
+	struct amdgpu_mca *mca = &adev->mca;
+
+	mca->mca_funcs = mca_funcs;
+}
+
+int amdgpu_mca_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_mca *mca = &adev->mca;
+	struct mca_bank_cache *mca_cache;
+	int i;
+
+	atomic_set(&mca->ue_update_flag, 0);
+
+	for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+		mca_cache = &mca->mca_caches[i];
+		mutex_init(&mca_cache->lock);
+		amdgpu_mca_bank_set_init(&mca_cache->mca_set);
+	}
+
+	return 0;
+}
+
+void amdgpu_mca_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_mca *mca = &adev->mca;
+	struct mca_bank_cache *mca_cache;
+	int i;
+
+	atomic_set(&mca->ue_update_flag, 0);
+
+	for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
+		mca_cache = &mca->mca_caches[i];
+		amdgpu_mca_bank_set_release(&mca_cache->mca_set);
+		mutex_destroy(&mca_cache->lock);
+	}
+}
+
+int amdgpu_mca_reset(struct amdgpu_device *adev)
+{
+	amdgpu_mca_fini(adev);
+
+	return amdgpu_mca_init(adev);
+}
+
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (mca_funcs && mca_funcs->mca_set_debug_mode)
+		return mca_funcs->mca_set_debug_mode(adev, enable);
+
+	return -EOPNOTSUPP;
+}
+
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+					 struct ras_query_context *qctx)
+{
+	u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
+
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+		      idx, entry->regs[MCA_REG_IDX_STATUS]);
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+		      idx, entry->regs[MCA_REG_IDX_ADDR]);
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+		      idx, entry->regs[MCA_REG_IDX_MISC0]);
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+		      idx, entry->regs[MCA_REG_IDX_IPID]);
+	RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+		      idx, entry->regs[MCA_REG_IDX_SYND]);
+}
+
+static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (!count)
+		return -EINVAL;
+
+	if (mca_funcs && mca_funcs->mca_get_valid_mca_count)
+		return mca_funcs->mca_get_valid_mca_count(adev, type, count);
+
+	return -EOPNOTSUPP;
+}
+
+static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+					int idx, struct mca_bank_entry *entry)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+	int count;
+
+	if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case AMDGPU_MCA_ERROR_TYPE_UE:
+		count = mca_funcs->max_ue_count;
+		break;
+	case AMDGPU_MCA_ERROR_TYPE_CE:
+		count = mca_funcs->max_ce_count;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (idx >= count)
+		return -EINVAL;
+
+	return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
+}
+
+static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
+{
+	struct amdgpu_mca *mca = &adev->mca;
+	bool ret = true;
+
+	/*
+	 * Because the UE Valid MCA count will only be cleared after reset,
+	 * in order to avoid repeated counting of the error count,
+	 * the aca bank is only updated once during the gpu recovery stage.
+	 */
+	if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+		if (amdgpu_ras_intr_triggered())
+			ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
+		else
+			atomic_set(&mca->ue_update_flag, 0);
+	}
+
+	return ret;
+}
+
+static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+					struct mca_bank_entry *entry)
+{
+	bool ret;
+
+	switch (type) {
+	case AMDGPU_MCA_ERROR_TYPE_CE:
+		ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]);
+		break;
+	case AMDGPU_MCA_ERROR_TYPE_UE:
+	default:
+		ret = true;
+		break;
+	}
+
+	return ret;
+}
+
+static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
+				      struct ras_query_context *qctx)
+{
+	struct mca_bank_entry entry;
+	uint32_t count = 0, i;
+	int ret;
+
+	if (!mca_set)
+		return -EINVAL;
+
+	if (!amdgpu_mca_bank_should_update(adev, type))
+		return 0;
+
+	ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < count; i++) {
+		memset(&entry, 0, sizeof(entry));
+		ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry);
+		if (ret)
+			return ret;
+
+		amdgpu_mca_bank_set_add_entry(mca_set, &entry);
+
+		if (amdgpu_mca_bank_should_dump(adev, type, &entry))
+			amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
+	}
+
+	return 0;
+}
+
+static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+						enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
+{
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (!count || !entry)
+		return -EINVAL;
+
+	if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
+		return -EOPNOTSUPP;
+
+	return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
+}
+
+static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+				       struct mca_bank_set *mca_set, struct ras_err_data *err_data)
+{
+	struct amdgpu_smuio_mcm_config_info mcm_info;
+	struct mca_bank_node *node, *tmp;
+	struct mca_bank_entry *entry;
+	uint32_t count;
+	int ret;
+
+	if (!mca_set)
+		return -EINVAL;
+
+	if (!mca_set->nr_entries)
+		return 0;
+
+	list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
+		entry = &node->entry;
+
+		count = 0;
+		ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
+		if (ret && ret != -EOPNOTSUPP)
+			return ret;
+
+		if (!count)
+			continue;
+
+		memset(&mcm_info, 0, sizeof(mcm_info));
+
+		mcm_info.socket_id = entry->info.socket_id;
+		mcm_info.die_id = entry->info.aid;
+
+		if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
+			amdgpu_ras_error_statistic_ue_count(err_data,
+							    &mcm_info, (uint64_t)count);
+		} else {
+			if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
+				amdgpu_ras_error_statistic_de_count(err_data,
+								    &mcm_info, (uint64_t)count);
+			else
+				amdgpu_ras_error_statistic_ce_count(err_data,
+								    &mcm_info, (uint64_t)count);
+		}
+
+		amdgpu_mca_bank_set_remove_node(mca_set, node);
+	}
+
+	return 0;
+}
+
+static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new)
+{
+	struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+	int ret;
+
+	mutex_lock(&mca_cache->lock);
+	ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new);
+	mutex_unlock(&mca_cache->lock);
+
+	return ret;
+}
+
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+				 struct ras_err_data *err_data, struct ras_query_context *qctx)
+{
+	struct mca_bank_set mca_set;
+	struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
+	int ret;
+
+	amdgpu_mca_bank_set_init(&mca_set);
+
+	ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx);
+	if (ret)
+		goto out_mca_release;
+
+	ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data);
+	if (ret)
+		goto out_mca_release;
+
+	/* add remain mca bank to mca cache */
+	if (mca_set.nr_entries) {
+		ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+		if (ret)
+			goto out_mca_release;
+	}
+
+	/* dispatch mca set again if mca cache has valid data */
+	mutex_lock(&mca_cache->lock);
+	if (mca_cache->mca_set.nr_entries)
+		ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data);
+	mutex_unlock(&mca_cache->lock);
+
+out_mca_release:
+	amdgpu_mca_bank_set_release(&mca_set);
+
+	return ret;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	int ret;
+
+	ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false);
+	if (ret)
+		return ret;
+
+	dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? "on" : "off");
+
+	return 0;
+}
+
+static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
+{
+	int i, idx = entry->idx;
+	int reg_idx_array[] = {
+		MCA_REG_IDX_STATUS,
+		MCA_REG_IDX_ADDR,
+		MCA_REG_IDX_MISC0,
+		MCA_REG_IDX_IPID,
+		MCA_REG_IDX_SYND,
+	};
+
+	seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
+	seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
+	seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
+		   idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype);
+
+	for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++)
+		seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]);
+}
+
+static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+	struct mca_bank_node *node;
+	struct mca_bank_set mca_set;
+	struct ras_query_context qctx;
+	int ret;
+
+	amdgpu_mca_bank_set_init(&mca_set);
+
+	qctx.evid.event_id = RAS_EVENT_INVALID_ID;
+	ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
+	if (ret)
+		goto err_free_mca_set;
+
+	seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
+		   type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries);
+
+	if (!mca_set.nr_entries)
+		goto err_free_mca_set;
+
+	list_for_each_entry(node, &mca_set.list, node)
+		mca_dump_entry(m, &node->entry);
+
+	/* add mca bank to mca bank cache */
+	ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
+
+err_free_mca_set:
+	amdgpu_mca_bank_set_release(&mca_set);
+
+	return ret;
+}
+
+static int mca_dump_ce_show(struct seq_file *m, void *unused)
+{
+	return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE);
+}
+
+static int mca_dump_ce_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations mca_ce_dump_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = mca_dump_ce_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int mca_dump_ue_show(struct seq_file *m, void *unused)
+{
+	return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE);
+}
+
+static int mca_dump_ue_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations mca_ue_dump_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = mca_dump_ue_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
+{
+#if defined(CONFIG_DEBUG_FS)
+	if (!root)
+		return;
+
+	debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);
+	debugfs_create_file("mca_ue_dump", 0400, root, adev, &mca_ue_dump_debug_fops);
+	debugfs_create_file("mca_ce_dump", 0400, root, adev, &mca_ce_dump_debug_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
new file mode 100644
index 000000000000..e80323ff90c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2021  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MCA_H__
+#define __AMDGPU_MCA_H__
+
+#include "amdgpu_ras.h"
+
+#define MCA_MAX_REGS_COUNT	(16)
+
+#define MCA_REG_FIELD(x, h, l)			(((x) & GENMASK_ULL(h, l)) >> l)
+#define MCA_REG__STATUS__VAL(x)			MCA_REG_FIELD(x, 63, 63)
+#define MCA_REG__STATUS__OVERFLOW(x)		MCA_REG_FIELD(x, 62, 62)
+#define MCA_REG__STATUS__UC(x)			MCA_REG_FIELD(x, 61, 61)
+#define MCA_REG__STATUS__EN(x)			MCA_REG_FIELD(x, 60, 60)
+#define MCA_REG__STATUS__MISCV(x)		MCA_REG_FIELD(x, 59, 59)
+#define MCA_REG__STATUS__ADDRV(x)		MCA_REG_FIELD(x, 58, 58)
+#define MCA_REG__STATUS__PCC(x)			MCA_REG_FIELD(x, 57, 57)
+#define MCA_REG__STATUS__ERRCOREIDVAL(x)	MCA_REG_FIELD(x, 56, 56)
+#define MCA_REG__STATUS__TCC(x)			MCA_REG_FIELD(x, 55, 55)
+#define MCA_REG__STATUS__SYNDV(x)		MCA_REG_FIELD(x, 53, 53)
+#define MCA_REG__STATUS__CECC(x)		MCA_REG_FIELD(x, 46, 46)
+#define MCA_REG__STATUS__UECC(x)		MCA_REG_FIELD(x, 45, 45)
+#define MCA_REG__STATUS__DEFERRED(x)		MCA_REG_FIELD(x, 44, 44)
+#define MCA_REG__STATUS__POISON(x)		MCA_REG_FIELD(x, 43, 43)
+#define MCA_REG__STATUS__SCRUB(x)		MCA_REG_FIELD(x, 40, 40)
+#define MCA_REG__STATUS__ERRCOREID(x)		MCA_REG_FIELD(x, 37, 32)
+#define MCA_REG__STATUS__ADDRLSB(x)		MCA_REG_FIELD(x, 29, 24)
+#define MCA_REG__STATUS__ERRORCODEEXT(x)	MCA_REG_FIELD(x, 21, 16)
+#define MCA_REG__STATUS__ERRORCODE(x)		MCA_REG_FIELD(x, 15, 0)
+
+#define MCA_REG__MISC0__ERRCNT(x)		MCA_REG_FIELD(x, 43, 32)
+
+#define MCA_REG__SYND__ERRORINFORMATION(x)	MCA_REG_FIELD(x, 17, 0)
+
+enum amdgpu_mca_ip {
+	AMDGPU_MCA_IP_UNKNOW = -1,
+	AMDGPU_MCA_IP_PSP = 0,
+	AMDGPU_MCA_IP_SDMA,
+	AMDGPU_MCA_IP_GC,
+	AMDGPU_MCA_IP_SMU,
+	AMDGPU_MCA_IP_MP5,
+	AMDGPU_MCA_IP_UMC,
+	AMDGPU_MCA_IP_PCS_XGMI,
+	AMDGPU_MCA_IP_COUNT,
+};
+
+enum amdgpu_mca_error_type {
+	AMDGPU_MCA_ERROR_TYPE_UE = 0,
+	AMDGPU_MCA_ERROR_TYPE_CE,
+	AMDGPU_MCA_ERROR_TYPE_DE,
+};
+
+struct amdgpu_mca_ras_block {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_mca_ras {
+	struct ras_common_if *ras_if;
+	struct amdgpu_mca_ras_block *ras;
+};
+
+struct mca_bank_set {
+	int nr_entries;
+	struct list_head list;
+};
+
+struct mca_bank_cache {
+	struct mca_bank_set mca_set;
+	struct mutex lock;
+};
+
+struct amdgpu_mca {
+	struct amdgpu_mca_ras mp0;
+	struct amdgpu_mca_ras mp1;
+	struct amdgpu_mca_ras mpio;
+	const struct amdgpu_mca_smu_funcs *mca_funcs;
+	struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE];
+	atomic_t ue_update_flag;
+};
+
+enum mca_reg_idx {
+	MCA_REG_IDX_STATUS		= 1,
+	MCA_REG_IDX_ADDR		= 2,
+	MCA_REG_IDX_MISC0		= 3,
+	MCA_REG_IDX_IPID		= 5,
+	MCA_REG_IDX_SYND		= 6,
+	MCA_REG_IDX_COUNT		= 16,
+};
+
+struct mca_bank_info {
+	int socket_id;
+	int aid;
+	int hwid;
+	int mcatype;
+};
+
+struct mca_bank_entry {
+	int idx;
+	enum amdgpu_mca_error_type type;
+	enum amdgpu_mca_ip ip;
+	struct mca_bank_info info;
+	uint64_t regs[MCA_MAX_REGS_COUNT];
+};
+
+struct mca_bank_node {
+	struct mca_bank_entry entry;
+	struct list_head node;
+};
+
+struct amdgpu_mca_smu_funcs {
+	int max_ue_count;
+	int max_ce_count;
+	int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
+	int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+					 struct mca_bank_entry *entry, uint32_t *count);
+	int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+				       uint32_t *count);
+	int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+				 int idx, struct mca_bank_entry *entry);
+};
+
+void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev,
+					      uint64_t mc_status_addr,
+					      unsigned long *error_count);
+
+void amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev,
+						uint64_t mc_status_addr,
+						unsigned long *error_count);
+
+void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
+				  uint64_t mc_status_addr);
+
+void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
+				      uint64_t mc_status_addr,
+				      void *ras_error_status);
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
+
+void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
+int amdgpu_mca_init(struct amdgpu_device *adev);
+void amdgpu_mca_fini(struct amdgpu_device *adev);
+int amdgpu_mca_reset(struct amdgpu_device *adev);
+int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					   enum amdgpu_mca_error_type type, uint32_t *total);
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+				 struct ras_err_data *err_data, struct ras_query_context *qctx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
new file mode 100644
index 000000000000..9c182ce501af
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_mes.h"
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "amdgpu_mes_ctx.h"
+
+#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+#define AMDGPU_ONE_DOORBELL_SIZE 8
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
+{
+	return roundup(AMDGPU_ONE_DOORBELL_SIZE *
+		       AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
+		       PAGE_SIZE);
+}
+
+static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
+{
+	int i;
+	struct amdgpu_mes *mes = &adev->mes;
+
+	/* Bitmap for dynamic allocation of kernel doorbells */
+	mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
+	if (!mes->doorbell_bitmap) {
+		dev_err(adev->dev, "Failed to allocate MES doorbell bitmap\n");
+		return -ENOMEM;
+	}
+
+	mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
+	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
+		adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
+		set_bit(i, mes->doorbell_bitmap);
+	}
+
+	return 0;
+}
+
+static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!amdgpu_mes_log_enable)
+		return 0;
+
+	r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM,
+				    &adev->mes.event_log_gpu_obj,
+				    &adev->mes.event_log_gpu_addr,
+				    &adev->mes.event_log_cpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
+		return r;
+	}
+
+	memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size);
+
+	return  0;
+
+}
+
+static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
+{
+	bitmap_free(adev->mes.doorbell_bitmap);
+}
+
+int amdgpu_mes_init(struct amdgpu_device *adev)
+{
+	int i, r, num_pipes;
+
+	adev->mes.adev = adev;
+
+	idr_init(&adev->mes.pasid_idr);
+	idr_init(&adev->mes.gang_id_idr);
+	idr_init(&adev->mes.queue_id_idr);
+	ida_init(&adev->mes.doorbell_ida);
+	spin_lock_init(&adev->mes.queue_id_lock);
+	mutex_init(&adev->mes.mutex_hidden);
+
+	for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++)
+		spin_lock_init(&adev->mes.ring_lock[i]);
+
+	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+	adev->mes.vmid_mask_mmhub = 0xFF00;
+	adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xFFFE : 0xFF00;
+
+	num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+	if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+		dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
+
+	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+		if (i >= num_pipes)
+			break;
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+		    IP_VERSION(12, 0, 0))
+			/*
+			 * GFX V12 has only one GFX pipe, but 8 queues in it.
+			 * GFX pipe 0 queue 0 is being used by Kernel queue.
+			 * Set GFX pipe 0 queue 1-7 for MES scheduling
+			 * mask = 1111 1110b
+			 */
+			adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
+		else
+			/*
+			 * GFX pipe 0 queue 0 is being used by Kernel queue.
+			 * Set GFX pipe 0 queue 1 for MES scheduling
+			 * mask = 10b
+			 */
+			adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
+	}
+
+	num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+	if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+		dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
+
+	for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
+		if (i >= num_pipes)
+			break;
+		adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
+	}
+
+	num_pipes = adev->sdma.num_instances;
+	if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+		dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+			 num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
+
+	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+		if (i >= num_pipes)
+			break;
+		adev->mes.sdma_hqd_mask[i] = 0xfc;
+	}
+
+	for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+		r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
+		if (r) {
+			dev_err(adev->dev,
+				"(%d) ring trail_fence_offs wb alloc failed\n",
+				r);
+			goto error;
+		}
+		adev->mes.sch_ctx_gpu_addr[i] =
+			adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4);
+		adev->mes.sch_ctx_ptr[i] =
+			(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]];
+
+		r = amdgpu_device_wb_get(adev,
+				 &adev->mes.query_status_fence_offs[i]);
+		if (r) {
+			dev_err(adev->dev,
+			      "(%d) query_status_fence_offs wb alloc failed\n",
+			      r);
+			goto error;
+		}
+		adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr +
+			(adev->mes.query_status_fence_offs[i] * 4);
+		adev->mes.query_status_fence_ptr[i] =
+			(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
+	}
+
+	r = amdgpu_mes_doorbell_init(adev);
+	if (r)
+		goto error;
+
+	r = amdgpu_mes_event_log_init(adev);
+	if (r)
+		goto error_doorbell;
+
+	if (adev->mes.hung_queue_db_array_size) {
+		r = amdgpu_bo_create_kernel(adev,
+					    adev->mes.hung_queue_db_array_size * sizeof(u32),
+					    PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &adev->mes.hung_queue_db_array_gpu_obj,
+					    &adev->mes.hung_queue_db_array_gpu_addr,
+					    &adev->mes.hung_queue_db_array_cpu_addr);
+		if (r) {
+			dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r);
+			goto error_doorbell;
+		}
+	}
+
+	return 0;
+
+error_doorbell:
+	amdgpu_mes_doorbell_free(adev);
+error:
+	for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+		if (adev->mes.sch_ctx_ptr[i])
+			amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+		if (adev->mes.query_status_fence_ptr[i])
+			amdgpu_device_wb_free(adev,
+				      adev->mes.query_status_fence_offs[i]);
+	}
+
+	idr_destroy(&adev->mes.pasid_idr);
+	idr_destroy(&adev->mes.gang_id_idr);
+	idr_destroy(&adev->mes.queue_id_idr);
+	ida_destroy(&adev->mes.doorbell_ida);
+	mutex_destroy(&adev->mes.mutex_hidden);
+	return r;
+}
+
+void amdgpu_mes_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj,
+			      &adev->mes.hung_queue_db_array_gpu_addr,
+			      &adev->mes.hung_queue_db_array_cpu_addr);
+
+	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
+			      &adev->mes.event_log_gpu_addr,
+			      &adev->mes.event_log_cpu_addr);
+
+	for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
+		if (adev->mes.sch_ctx_ptr[i])
+			amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]);
+		if (adev->mes.query_status_fence_ptr[i])
+			amdgpu_device_wb_free(adev,
+				      adev->mes.query_status_fence_offs[i]);
+	}
+
+	amdgpu_mes_doorbell_free(adev);
+
+	idr_destroy(&adev->mes.pasid_idr);
+	idr_destroy(&adev->mes.gang_id_idr);
+	idr_destroy(&adev->mes.queue_id_idr);
+	ida_destroy(&adev->mes.doorbell_ida);
+	mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev)
+{
+	struct mes_suspend_gang_input input;
+	int r;
+
+	if (!amdgpu_mes_suspend_resume_all_supported(adev))
+		return 0;
+
+	memset(&input, 0x0, sizeof(struct mes_suspend_gang_input));
+	input.suspend_all_gangs = 1;
+
+	/*
+	 * Avoid taking any other locks under MES lock to avoid circular
+	 * lock dependencies.
+	 */
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to suspend all gangs");
+
+	return r;
+}
+
+int amdgpu_mes_resume(struct amdgpu_device *adev)
+{
+	struct mes_resume_gang_input input;
+	int r;
+
+	if (!amdgpu_mes_suspend_resume_all_supported(adev))
+		return 0;
+
+	memset(&input, 0x0, sizeof(struct mes_resume_gang_input));
+	input.resume_all_gangs = 1;
+
+	/*
+	 * Avoid taking any other locks under MES lock to avoid circular
+	 * lock dependencies.
+	 */
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->resume_gang(&adev->mes, &input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to resume all gangs");
+
+	return r;
+}
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+				struct amdgpu_ring *ring)
+{
+	struct mes_map_legacy_queue_input queue_input;
+	int r;
+
+	memset(&queue_input, 0, sizeof(queue_input));
+
+	queue_input.queue_type = ring->funcs->type;
+	queue_input.doorbell_offset = ring->doorbell_index;
+	queue_input.pipe_id = ring->pipe;
+	queue_input.queue_id = ring->queue;
+	queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	queue_input.wptr_addr = ring->wptr_gpu_addr;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to map legacy queue\n");
+
+	return r;
+}
+
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring,
+				  enum amdgpu_unmap_queues_action action,
+				  u64 gpu_addr, u64 seq)
+{
+	struct mes_unmap_legacy_queue_input queue_input;
+	int r;
+
+	queue_input.action = action;
+	queue_input.queue_type = ring->funcs->type;
+	queue_input.doorbell_offset = ring->doorbell_index;
+	queue_input.pipe_id = ring->pipe;
+	queue_input.queue_id = ring->queue;
+	queue_input.trail_fence_addr = gpu_addr;
+	queue_input.trail_fence_data = seq;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to unmap legacy queue\n");
+
+	return r;
+}
+
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring,
+				  unsigned int vmid,
+				  bool use_mmio)
+{
+	struct mes_reset_queue_input queue_input;
+	int r;
+
+	memset(&queue_input, 0, sizeof(queue_input));
+
+	queue_input.queue_type = ring->funcs->type;
+	queue_input.doorbell_offset = ring->doorbell_index;
+	queue_input.me_id = ring->me;
+	queue_input.pipe_id = ring->pipe;
+	queue_input.queue_id = ring->queue;
+	queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0;
+	queue_input.wptr_addr = ring->wptr_gpu_addr;
+	queue_input.vmid = vmid;
+	queue_input.use_mmio = use_mmio;
+	queue_input.is_kq = true;
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+		queue_input.legacy_gfx = true;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to reset legacy queue\n");
+
+	return r;
+}
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev)
+{
+	return adev->mes.hung_queue_db_array_size;
+}
+
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+					    int queue_type,
+					    bool detect_only,
+					    unsigned int *hung_db_num,
+					    u32 *hung_db_array)
+
+{
+	struct mes_detect_and_reset_queue_input input;
+	u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr;
+	int r, i;
+
+	if (!hung_db_num || !hung_db_array)
+		return -EINVAL;
+
+	if ((queue_type != AMDGPU_RING_TYPE_GFX) &&
+	    (queue_type != AMDGPU_RING_TYPE_COMPUTE) &&
+	    (queue_type != AMDGPU_RING_TYPE_SDMA))
+		return -EINVAL;
+
+	/* Clear the doorbell array before detection */
+	memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET,
+		adev->mes.hung_queue_db_array_size * sizeof(u32));
+	input.queue_type = queue_type;
+	input.detect_only = detect_only;
+
+	r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes,
+							  &input);
+	if (r) {
+		dev_err(adev->dev, "failed to detect and reset\n");
+	} else {
+		*hung_db_num = 0;
+		for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) {
+			if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) {
+				hung_db_array[i] = db_array[i];
+				*hung_db_num += 1;
+			}
+		}
+
+		/*
+		 * TODO: return HQD info for MES scheduled user compute queue reset cases
+		 * stored in hung_db_array hqd info offset to full array size
+		 */
+	}
+
+	return r;
+}
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+	struct mes_misc_op_input op_input;
+	int r, val = 0;
+	uint32_t addr_offset = 0;
+	uint64_t read_val_gpu_addr;
+	uint32_t *read_val_ptr;
+
+	if (amdgpu_device_wb_get(adev, &addr_offset)) {
+		dev_err(adev->dev, "critical bug! too many mes readers\n");
+		goto error;
+	}
+	read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+	read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
+	op_input.op = MES_MISC_OP_READ_REG;
+	op_input.read_reg.reg_offset = reg;
+	op_input.read_reg.buffer_addr = read_val_gpu_addr;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev, "mes rreg is not supported!\n");
+		goto error;
+	}
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
+	else
+		val = *(read_val_ptr);
+
+error:
+	if (addr_offset)
+		amdgpu_device_wb_free(adev, addr_offset);
+	return val;
+}
+
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+		    uint32_t reg, uint32_t val)
+{
+	struct mes_misc_op_input op_input;
+	int r;
+
+	op_input.op = MES_MISC_OP_WRITE_REG;
+	op_input.write_reg.reg_offset = reg;
+	op_input.write_reg.reg_value = val;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev, "mes wreg is not supported!\n");
+		r = -EINVAL;
+		goto error;
+	}
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
+
+error:
+	return r;
+}
+
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+				  uint32_t reg0, uint32_t reg1,
+				  uint32_t ref, uint32_t mask)
+{
+	struct mes_misc_op_input op_input;
+	int r;
+
+	op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
+	op_input.wrm_reg.reg0 = reg0;
+	op_input.wrm_reg.reg1 = reg1;
+	op_input.wrm_reg.ref = ref;
+	op_input.wrm_reg.mask = mask;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
+		r = -EINVAL;
+		goto error;
+	}
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to reg_write_reg_wait\n");
+
+error:
+	return r;
+}
+
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev)
+{
+	uint32_t hdp_flush_req_offset, hdp_flush_done_offset, ref_and_mask;
+
+	hdp_flush_req_offset = adev->nbio.funcs->get_hdp_flush_req_offset(adev);
+	hdp_flush_done_offset = adev->nbio.funcs->get_hdp_flush_done_offset(adev);
+	ref_and_mask = adev->nbio.hdp_flush_reg->ref_and_mask_cp0;
+
+	return amdgpu_mes_reg_write_reg_wait(adev, hdp_flush_req_offset, hdp_flush_done_offset,
+					     ref_and_mask, ref_and_mask);
+}
+
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr,
+				uint32_t spi_gdbg_per_vmid_cntl,
+				const uint32_t *tcp_watch_cntl,
+				uint32_t flags,
+				bool trap_en)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev,
+			"mes set shader debugger is not supported!\n");
+		return -EINVAL;
+	}
+
+	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+	op_input.set_shader_debugger.process_context_addr = process_context_addr;
+	op_input.set_shader_debugger.flags.u32all = flags;
+
+	/* use amdgpu mes_flush_shader_debugger instead */
+	if (op_input.set_shader_debugger.flags.process_ctx_flush)
+		return -EINVAL;
+
+	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
+	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+	if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+			AMDGPU_MES_API_VERSION_SHIFT) >= 14)
+		op_input.set_shader_debugger.trap_en = trap_en;
+
+	amdgpu_mes_lock(&adev->mes);
+
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	if (r)
+		dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+	amdgpu_mes_unlock(&adev->mes);
+
+	return r;
+}
+
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				     uint64_t process_context_addr)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev,
+			"mes flush shader debugger is not supported!\n");
+		return -EINVAL;
+	}
+
+	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+	op_input.set_shader_debugger.process_context_addr = process_context_addr;
+	op_input.set_shader_debugger.flags.process_ctx_flush = true;
+
+	amdgpu_mes_lock(&adev->mes);
+
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	if (r)
+		dev_err(adev->dev, "failed to set_shader_debugger\n");
+
+	amdgpu_mes_unlock(&adev->mes);
+
+	return r;
+}
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+						   enum amdgpu_mes_priority_level prio)
+{
+	return adev->mes.aggregated_doorbells[prio];
+}
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
+{
+	const struct mes_firmware_header_v1_0 *mes_hdr;
+	struct amdgpu_firmware_info *info;
+	char ucode_prefix[30];
+	char fw_name[50];
+	bool need_retry = false;
+	u32 *ucode_ptr;
+	int r;
+
+	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
+				       sizeof(ucode_prefix));
+	if (adev->enable_uni_mes) {
+		snprintf(fw_name, sizeof(fw_name),
+			 "amdgpu/%s_uni_mes.bin", ucode_prefix);
+	} else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+			 ucode_prefix,
+			 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
+		need_retry = true;
+	} else {
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
+			 ucode_prefix,
+			 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
+	}
+
+	r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+				 "%s", fw_name);
+	if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
+		dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
+		r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+					 AMDGPU_UCODE_REQUIRED,
+					 "amdgpu/%s_mes.bin", ucode_prefix);
+	}
+
+	if (r)
+		goto out;
+
+	mes_hdr = (const struct mes_firmware_header_v1_0 *)
+		adev->mes.fw[pipe]->data;
+	adev->mes.uc_start_addr[pipe] =
+		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+	adev->mes.data_start_addr[pipe] =
+		le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+		((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+	ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data +
+			  sizeof(union amdgpu_firmware_header));
+	adev->mes.fw_version[pipe] =
+		le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		int ucode, ucode_data;
+
+		if (pipe == AMDGPU_MES_SCHED_PIPE) {
+			ucode = AMDGPU_UCODE_ID_CP_MES;
+			ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
+		} else {
+			ucode = AMDGPU_UCODE_ID_CP_MES1;
+			ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
+		}
+
+		info = &adev->firmware.ucode[ucode];
+		info->ucode_id = ucode;
+		info->fw = adev->mes.fw[pipe];
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
+			      PAGE_SIZE);
+
+		info = &adev->firmware.ucode[ucode_data];
+		info->ucode_id = ucode_data;
+		info->fw = adev->mes.fw[pipe];
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
+			      PAGE_SIZE);
+	}
+
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->mes.fw[pipe]);
+	return r;
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
+{
+	uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+
+	return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
+		 amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) &&
+		 mes_rev >= 0x63) ||
+		amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0));
+}
+
+/* Fix me -- node_id is used to identify the correct MES instances in the future */
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+					    uint32_t node_id, bool enable)
+{
+	struct mes_misc_op_input op_input = {0};
+	int r;
+
+	op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+	op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+	if (!adev->mes.funcs->misc_op) {
+		dev_err(adev->dev, "mes change config is not supported!\n");
+		r = -EINVAL;
+		goto error;
+	}
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r)
+		dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+	return r;
+}
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+	int i, r = 0;
+
+	if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+		mutex_lock(&adev->enforce_isolation_mutex);
+		for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+			if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+				r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+			else
+				r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+		}
+		mutex_unlock(&adev->enforce_isolation_mutex);
+	}
+	return r;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
+
+	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
+		     mem, adev->mes.event_log_size, false);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
+
+#endif
+
+void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
+{
+
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	if (adev->enable_mes && amdgpu_mes_log_enable)
+		debugfs_create_file("amdgpu_mes_event_log", 0444, root,
+				    adev, &amdgpu_debugfs_mes_event_log_fops);
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
new file mode 100644
index 000000000000..e989225b354b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -0,0 +1,509 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_H__
+#define __AMDGPU_MES_H__
+
+#include "amdgpu_irq.h"
+#include "kgd_kfd_interface.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_doorbell.h"
+#include <linux/sched/mm.h>
+
+#define AMDGPU_MES_MAX_COMPUTE_PIPES        8
+#define AMDGPU_MES_MAX_GFX_PIPES            2
+#define AMDGPU_MES_MAX_SDMA_PIPES           2
+
+#define AMDGPU_MES_API_VERSION_SHIFT	12
+#define AMDGPU_MES_FEAT_VERSION_SHIFT	24
+
+#define AMDGPU_MES_VERSION_MASK		0x00000fff
+#define AMDGPU_MES_API_VERSION_MASK	0x00fff000
+#define AMDGPU_MES_FEAT_VERSION_MASK	0xff000000
+#define AMDGPU_MES_MSCRATCH_SIZE	0x40000
+#define AMDGPU_MES_INVALID_DB_OFFSET	0xffffffff
+
+enum amdgpu_mes_priority_level {
+	AMDGPU_MES_PRIORITY_LEVEL_LOW       = 0,
+	AMDGPU_MES_PRIORITY_LEVEL_NORMAL    = 1,
+	AMDGPU_MES_PRIORITY_LEVEL_MEDIUM    = 2,
+	AMDGPU_MES_PRIORITY_LEVEL_HIGH      = 3,
+	AMDGPU_MES_PRIORITY_LEVEL_REALTIME  = 4,
+	AMDGPU_MES_PRIORITY_NUM_LEVELS
+};
+
+#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+
+struct amdgpu_mes_funcs;
+
+enum amdgpu_mes_pipe {
+	AMDGPU_MES_SCHED_PIPE = 0,
+	AMDGPU_MES_KIQ_PIPE,
+	AMDGPU_MAX_MES_PIPES = 2,
+};
+
+struct amdgpu_mes {
+	struct amdgpu_device            *adev;
+
+	struct mutex                    mutex_hidden;
+
+	struct idr                      pasid_idr;
+	struct idr                      gang_id_idr;
+	struct idr                      queue_id_idr;
+	struct ida                      doorbell_ida;
+
+	spinlock_t                      queue_id_lock;
+
+	uint32_t			sched_version;
+	uint32_t			kiq_version;
+	uint32_t			fw_version[AMDGPU_MAX_MES_PIPES];
+	bool                            enable_legacy_queue_map;
+
+	uint32_t                        total_max_queue;
+	uint32_t                        max_doorbell_slices;
+
+	uint64_t                        default_process_quantum;
+	uint64_t                        default_gang_quantum;
+
+	struct amdgpu_ring              ring[AMDGPU_MAX_MES_PIPES];
+	spinlock_t                      ring_lock[AMDGPU_MAX_MES_PIPES];
+
+	const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
+
+	/* mes ucode */
+	struct amdgpu_bo		*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
+	uint64_t			ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+	uint32_t			*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
+	uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
+
+	/* mes ucode data */
+	struct amdgpu_bo		*data_fw_obj[AMDGPU_MAX_MES_PIPES];
+	uint64_t			data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
+	uint32_t			*data_fw_ptr[AMDGPU_MAX_MES_PIPES];
+	uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
+
+	/* eop gpu obj */
+	struct amdgpu_bo		*eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
+	uint64_t                        eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
+
+	void                            *mqd_backup[AMDGPU_MAX_MES_PIPES];
+	struct amdgpu_irq_src	        irq[AMDGPU_MAX_MES_PIPES];
+
+	uint32_t                        vmid_mask_gfxhub;
+	uint32_t                        vmid_mask_mmhub;
+	uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+	uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
+	uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
+	uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+	uint32_t                        sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
+	uint64_t			sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
+	uint64_t			*sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
+	uint32_t			query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
+	uint64_t			query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
+	uint64_t			*query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
+
+	uint32_t			saved_flags;
+
+	/* initialize kiq pipe */
+	int                             (*kiq_hw_init)(struct amdgpu_device *adev);
+	int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
+
+	/* MES doorbells */
+	uint32_t			db_start_dw_offset;
+	uint32_t			num_mes_dbs;
+	unsigned long			*doorbell_bitmap;
+
+	/* MES event log buffer */
+	uint32_t			event_log_size;
+	struct amdgpu_bo	*event_log_gpu_obj;
+	uint64_t			event_log_gpu_addr;
+	void				*event_log_cpu_addr;
+
+	/* ip specific functions */
+	const struct amdgpu_mes_funcs   *funcs;
+
+	/* mes resource_1 bo*/
+	struct amdgpu_bo    *resource_1[AMDGPU_MAX_MES_PIPES];
+	uint64_t            resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
+	void                *resource_1_addr[AMDGPU_MAX_MES_PIPES];
+
+	int				hung_queue_db_array_size;
+	int				hung_queue_hqd_info_offset;
+	struct amdgpu_bo		*hung_queue_db_array_gpu_obj;
+	uint64_t			hung_queue_db_array_gpu_addr;
+	void				*hung_queue_db_array_cpu_addr;
+};
+
+struct amdgpu_mes_gang {
+	int 				gang_id;
+	int 				priority;
+	int 				inprocess_gang_priority;
+	int 				global_priority_level;
+	struct list_head 		list;
+	struct amdgpu_mes_process 	*process;
+	struct amdgpu_bo 		*gang_ctx_bo;
+	uint64_t 			gang_ctx_gpu_addr;
+	void 				*gang_ctx_cpu_ptr;
+	uint64_t 			gang_quantum;
+	struct list_head 		queue_list;
+};
+
+struct amdgpu_mes_queue {
+	struct list_head 		list;
+	struct amdgpu_mes_gang 		*gang;
+	int 				queue_id;
+	uint64_t 			doorbell_off;
+	struct amdgpu_bo		*mqd_obj;
+	void				*mqd_cpu_ptr;
+	uint64_t 			mqd_gpu_addr;
+	uint64_t 			wptr_gpu_addr;
+	int 				queue_type;
+	int 				paging;
+	struct amdgpu_ring 		*ring;
+};
+
+struct amdgpu_mes_queue_properties {
+	int 			queue_type;
+	uint64_t                hqd_base_gpu_addr;
+	uint64_t                rptr_gpu_addr;
+	uint64_t                wptr_gpu_addr;
+	uint64_t                wptr_mc_addr;
+	uint32_t                queue_size;
+	uint64_t                eop_gpu_addr;
+	uint32_t                hqd_pipe_priority;
+	uint32_t                hqd_queue_priority;
+	bool 			paging;
+	struct amdgpu_ring 	*ring;
+	/* out */
+	uint64_t       		doorbell_off;
+};
+
+struct amdgpu_mes_gang_properties {
+	uint32_t 	priority;
+	uint32_t 	gang_quantum;
+	uint32_t 	inprocess_gang_priority;
+	uint32_t 	priority_level;
+	int 		global_priority_level;
+};
+
+struct mes_add_queue_input {
+	uint32_t	process_id;
+	uint64_t	page_table_base_addr;
+	uint64_t	process_va_start;
+	uint64_t	process_va_end;
+	uint64_t	process_quantum;
+	uint64_t	process_context_addr;
+	uint64_t	gang_quantum;
+	uint64_t	gang_context_addr;
+	uint32_t	inprocess_gang_priority;
+	uint32_t	gang_global_priority_level;
+	uint32_t	doorbell_offset;
+	uint64_t	mqd_addr;
+	uint64_t	wptr_addr;
+	uint64_t	wptr_mc_addr;
+	uint32_t	queue_type;
+	uint32_t	paging;
+	uint32_t        gws_base;
+	uint32_t        gws_size;
+	uint64_t	tba_addr;
+	uint64_t	tma_addr;
+	uint32_t	trap_en;
+	uint32_t	skip_process_ctx_clear;
+	uint32_t	is_kfd_process;
+	uint32_t	is_aql_queue;
+	uint32_t	queue_size;
+	uint32_t	exclusively_scheduled;
+};
+
+struct mes_remove_queue_input {
+	uint32_t	doorbell_offset;
+	uint64_t	gang_context_addr;
+	bool		remove_queue_after_reset;
+};
+
+struct mes_map_legacy_queue_input {
+	uint32_t                           queue_type;
+	uint32_t                           doorbell_offset;
+	uint32_t                           pipe_id;
+	uint32_t                           queue_id;
+	uint64_t                           mqd_addr;
+	uint64_t                           wptr_addr;
+};
+
+struct mes_unmap_legacy_queue_input {
+	enum amdgpu_unmap_queues_action    action;
+	uint32_t                           queue_type;
+	uint32_t                           doorbell_offset;
+	uint32_t                           pipe_id;
+	uint32_t                           queue_id;
+	uint64_t                           trail_fence_addr;
+	uint64_t                           trail_fence_data;
+};
+
+struct mes_suspend_gang_input {
+	bool		suspend_all_gangs;
+	uint64_t	gang_context_addr;
+	uint64_t	suspend_fence_addr;
+	uint32_t	suspend_fence_value;
+};
+
+struct mes_resume_gang_input {
+	bool		resume_all_gangs;
+	uint64_t	gang_context_addr;
+};
+
+struct mes_reset_queue_input {
+	uint32_t                           queue_type;
+	uint32_t                           doorbell_offset;
+	bool                               use_mmio;
+	uint32_t                           me_id;
+	uint32_t                           pipe_id;
+	uint32_t                           queue_id;
+	uint64_t                           mqd_addr;
+	uint64_t                           wptr_addr;
+	uint32_t                           vmid;
+	bool                               legacy_gfx;
+	bool                               is_kq;
+};
+
+struct mes_detect_and_reset_queue_input {
+	uint32_t                           queue_type;
+	bool                               detect_only;
+};
+
+struct mes_inv_tlbs_pasid_input {
+	uint32_t        xcc_id;
+	uint16_t        pasid;
+	uint8_t         hub_id;
+	uint8_t         flush_type;
+};
+
+enum mes_misc_opcode {
+	MES_MISC_OP_WRITE_REG,
+	MES_MISC_OP_READ_REG,
+	MES_MISC_OP_WRM_REG_WAIT,
+	MES_MISC_OP_WRM_REG_WR_WAIT,
+	MES_MISC_OP_SET_SHADER_DEBUGGER,
+	MES_MISC_OP_CHANGE_CONFIG,
+};
+
+struct mes_misc_op_input {
+	enum mes_misc_opcode op;
+
+	union {
+		struct {
+			uint32_t                  reg_offset;
+			uint64_t                  buffer_addr;
+		} read_reg;
+
+		struct {
+			uint32_t                  reg_offset;
+			uint32_t                  reg_value;
+		} write_reg;
+
+		struct {
+			uint32_t                   ref;
+			uint32_t                   mask;
+			uint32_t                   reg0;
+			uint32_t                   reg1;
+		} wrm_reg;
+
+		struct {
+			uint64_t process_context_addr;
+			union {
+				struct {
+					uint32_t single_memop : 1;
+					uint32_t single_alu_op : 1;
+					uint32_t reserved: 29;
+					uint32_t process_ctx_flush: 1;
+				};
+				uint32_t u32all;
+			} flags;
+			uint32_t spi_gdbg_per_vmid_cntl;
+			uint32_t tcp_watch_cntl[4];
+			uint32_t trap_en;
+		} set_shader_debugger;
+
+		struct {
+			union {
+				struct {
+					uint32_t limit_single_process : 1;
+					uint32_t enable_hws_logging_buffer : 1;
+					uint32_t reserved : 30;
+				};
+				uint32_t all;
+			} option;
+			struct {
+				uint32_t tdr_level;
+				uint32_t tdr_delay;
+			} tdr_config;
+		} change_config;
+	};
+};
+
+struct amdgpu_mes_funcs {
+	int (*add_hw_queue)(struct amdgpu_mes *mes,
+			    struct mes_add_queue_input *input);
+
+	int (*remove_hw_queue)(struct amdgpu_mes *mes,
+			       struct mes_remove_queue_input *input);
+
+	int (*map_legacy_queue)(struct amdgpu_mes *mes,
+				struct mes_map_legacy_queue_input *input);
+
+	int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+				  struct mes_unmap_legacy_queue_input *input);
+
+	int (*suspend_gang)(struct amdgpu_mes *mes,
+			    struct mes_suspend_gang_input *input);
+
+	int (*resume_gang)(struct amdgpu_mes *mes,
+			   struct mes_resume_gang_input *input);
+
+	int (*misc_op)(struct amdgpu_mes *mes,
+		       struct mes_misc_op_input *input);
+
+	int (*reset_hw_queue)(struct amdgpu_mes *mes,
+			      struct mes_reset_queue_input *input);
+
+	int (*detect_and_reset_hung_queues)(struct amdgpu_mes *mes,
+			      struct mes_detect_and_reset_queue_input *input);
+
+
+	int (*invalidate_tlbs_pasid)(struct amdgpu_mes *mes,
+			      struct mes_inv_tlbs_pasid_input *input);
+};
+
+#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+
+int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
+int amdgpu_mes_init(struct amdgpu_device *adev);
+void amdgpu_mes_fini(struct amdgpu_device *adev);
+
+int amdgpu_mes_suspend(struct amdgpu_device *adev);
+int amdgpu_mes_resume(struct amdgpu_device *adev);
+
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+				struct amdgpu_ring *ring);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring,
+				  enum amdgpu_unmap_queues_action action,
+				  u64 gpu_addr, u64 seq);
+int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
+				  struct amdgpu_ring *ring,
+				  unsigned int vmid,
+				  bool use_mmio);
+
+int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev);
+int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev,
+					    int queue_type,
+					    bool detect_only,
+					    unsigned int *hung_db_num,
+					    u32 *hung_db_array);
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+		    uint32_t reg, uint32_t val);
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+				  uint32_t reg0, uint32_t reg1,
+				  uint32_t ref, uint32_t mask);
+int amdgpu_mes_hdp_flush(struct amdgpu_device *adev);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr,
+				uint32_t spi_gdbg_per_vmid_cntl,
+				const uint32_t *tcp_watch_cntl,
+				uint32_t flags,
+				bool trap_en);
+int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
+				uint64_t process_context_addr);
+
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+						   enum amdgpu_mes_priority_level prio);
+
+int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+	mutex_lock(&mes->mutex_hidden);
+	mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+	memalloc_noreclaim_restore(mes->saved_flags);
+	mutex_unlock(&mes->mutex_hidden);
+}
+
+bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
new file mode 100644
index 000000000000..912a5be2ece6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_CTX_H__
+#define __AMDGPU_MES_CTX_H__
+
+#include "v10_structs.h"
+
+enum {
+	AMDGPU_MES_CTX_RPTR_OFFS = 0,
+	AMDGPU_MES_CTX_WPTR_OFFS,
+	AMDGPU_MES_CTX_FENCE_OFFS,
+	AMDGPU_MES_CTX_COND_EXE_OFFS,
+	AMDGPU_MES_CTX_TRAIL_FENCE_OFFS,
+	AMDGPU_MES_CTX_MAX_OFFS,
+};
+
+enum {
+	AMDGPU_MES_CTX_RING_OFFS = AMDGPU_MES_CTX_MAX_OFFS,
+	AMDGPU_MES_CTX_IB_OFFS,
+	AMDGPU_MES_CTX_PADDING_OFFS,
+};
+
+#define AMDGPU_MES_CTX_MAX_GFX_RINGS            1
+#define AMDGPU_MES_CTX_MAX_COMPUTE_RINGS        4
+#define AMDGPU_MES_CTX_MAX_SDMA_RINGS           2
+#define AMDGPU_MES_CTX_MAX_RINGS					\
+	(AMDGPU_MES_CTX_MAX_GFX_RINGS +					\
+	 AMDGPU_MES_CTX_MAX_COMPUTE_RINGS +				\
+	 AMDGPU_MES_CTX_MAX_SDMA_RINGS)
+
+#define AMDGPU_CSA_SDMA_SIZE    64
+#define GFX10_MEC_HPD_SIZE	2048
+
+struct amdgpu_wb_slot {
+	uint32_t data[8];
+};
+
+struct amdgpu_mes_ctx_meta_data {
+	struct {
+		uint8_t ring[PAGE_SIZE * 4];
+
+		/* gfx csa */
+		struct v10_gfx_meta_data gfx_meta_data;
+
+		uint8_t gds_backup[64 * 1024];
+
+		struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+		/* only for ib test */
+		uint32_t ib[256] __aligned(256);
+
+		uint32_t padding[64];
+
+	} __aligned(PAGE_SIZE) gfx[AMDGPU_MES_CTX_MAX_GFX_RINGS];
+
+	struct {
+		uint8_t ring[PAGE_SIZE * 4];
+
+		uint8_t mec_hpd[GFX10_MEC_HPD_SIZE];
+
+		struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+		/* only for ib test */
+		uint32_t ib[256] __aligned(256);
+
+		uint32_t padding[64];
+
+	} __aligned(PAGE_SIZE) compute[AMDGPU_MES_CTX_MAX_COMPUTE_RINGS];
+
+	struct {
+		uint8_t ring[PAGE_SIZE * 4];
+
+		/* sdma csa for mcbp */
+		uint8_t sdma_meta_data[AMDGPU_CSA_SDMA_SIZE];
+
+		struct amdgpu_wb_slot slots[AMDGPU_MES_CTX_MAX_OFFS];
+
+		/* only for ib test */
+		uint32_t ib[256] __aligned(256);
+
+		uint32_t padding[64];
+
+	} __aligned(PAGE_SIZE) sdma[AMDGPU_MES_CTX_MAX_SDMA_RINGS];
+};
+
+struct amdgpu_mes_ctx_data {
+	struct amdgpu_bo	*meta_data_obj;
+	uint64_t                meta_data_gpu_addr;
+	uint64_t                meta_data_mc_addr;
+	struct amdgpu_bo_va	*meta_data_va;
+	void                    *meta_data_ptr;
+	uint32_t                gang_ids[AMDGPU_HW_IP_DMA+1];
+};
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG     0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK  (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#define AMDGPU_FENCE_MES_QUEUE_FLAG     0x1000000u
+#define AMDGPU_FENCE_MES_QUEUE_ID_MASK  (AMDGPU_FENCE_MES_QUEUE_FLAG - 1)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
new file mode 100644
index 000000000000..0f6b1021fef3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2023  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_mmhub_ras *ras;
+
+	if (!adev->mmhub.ras)
+		return 0;
+
+	ras = adev->mmhub.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register mmhub ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "mmhub");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->mmhub.ras_if = &ras->ras_block.ras_comm;
+
+	/* mmhub ras follows amdgpu_ras_block_late_init_default for late init */
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
new file mode 100644
index 000000000000..1ca9d4ed8063
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_MMHUB_H__
+#define __AMDGPU_MMHUB_H__
+
+enum amdgpu_mmhub_ras_memory_id {
+	AMDGPU_MMHUB_WGMI_PAGEMEM = 0,
+	AMDGPU_MMHUB_RGMI_PAGEMEM = 1,
+	AMDGPU_MMHUB_WDRAM_PAGEMEM = 2,
+	AMDGPU_MMHUB_RDRAM_PAGEMEM = 3,
+	AMDGPU_MMHUB_WIO_CMDMEM = 4,
+	AMDGPU_MMHUB_RIO_CMDMEM = 5,
+	AMDGPU_MMHUB_WGMI_CMDMEM = 6,
+	AMDGPU_MMHUB_RGMI_CMDMEM = 7,
+	AMDGPU_MMHUB_WDRAM_CMDMEM = 8,
+	AMDGPU_MMHUB_RDRAM_CMDMEM = 9,
+	AMDGPU_MMHUB_MAM_DMEM0 = 10,
+	AMDGPU_MMHUB_MAM_DMEM1 = 11,
+	AMDGPU_MMHUB_MAM_DMEM2 = 12,
+	AMDGPU_MMHUB_MAM_DMEM3 = 13,
+	AMDGPU_MMHUB_WRET_TAGMEM = 19,
+	AMDGPU_MMHUB_RRET_TAGMEM = 20,
+	AMDGPU_MMHUB_WIO_DATAMEM = 21,
+	AMDGPU_MMHUB_WGMI_DATAMEM = 22,
+	AMDGPU_MMHUB_WDRAM_DATAMEM = 23,
+	AMDGPU_MMHUB_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_mmhub_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_mmhub_funcs {
+	u64 (*get_fb_location)(struct amdgpu_device *adev);
+	u64 (*get_mc_fb_offset)(struct amdgpu_device *adev);
+	void (*init)(struct amdgpu_device *adev);
+	int (*gart_enable)(struct amdgpu_device *adev);
+	void (*set_fault_enable_default)(struct amdgpu_device *adev,
+			bool value);
+	void (*gart_disable)(struct amdgpu_device *adev);
+	int (*set_clockgating)(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+	void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags);
+	void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
+				uint64_t page_table_base);
+	void (*update_power_gating)(struct amdgpu_device *adev,
+                                bool enable);
+};
+
+struct amdgpu_mmhub {
+	struct ras_common_if *ras_if;
+	const struct amdgpu_mmhub_funcs *funcs;
+	struct amdgpu_mmhub_ras  *ras;
+};
+
+int amdgpu_mmhub_ras_sw_init(struct amdgpu_device *adev);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
new file mode 100644
index 000000000000..dc8d2f52c7d6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -0,0 +1,713 @@
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original Authors:
+ *   Kevin E. Martin, Rickard E. Faith, Alan Hourihane
+ *
+ * Kernel port Author: Dave Airlie
+ */
+
+#ifndef AMDGPU_MODE_H
+#define AMDGPU_MODE_H
+
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_probe_helper.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/hrtimer.h>
+#include "amdgpu_irq.h"
+
+#include <drm/display/drm_dp_mst_helper.h>
+#include "modules/inc/mod_freesync.h"
+#include "amdgpu_dm_irq_params.h"
+
+struct amdgpu_bo;
+struct amdgpu_device;
+struct amdgpu_encoder;
+struct amdgpu_router;
+struct amdgpu_hpd;
+struct edid;
+struct drm_edid;
+
+#define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base)
+#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
+#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
+#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
+
+#define to_dm_plane_state(x)	container_of(x, struct dm_plane_state, base)
+
+#define AMDGPU_MAX_HPD_PINS 6
+#define AMDGPU_MAX_CRTCS 6
+#define AMDGPU_MAX_PLANES 6
+#define AMDGPU_MAX_AFMT_BLOCKS 9
+
+enum amdgpu_rmx_type {
+	RMX_OFF,
+	RMX_FULL,
+	RMX_CENTER,
+	RMX_ASPECT
+};
+
+enum amdgpu_underscan_type {
+	UNDERSCAN_OFF,
+	UNDERSCAN_ON,
+	UNDERSCAN_AUTO,
+};
+
+#define AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS 50
+#define AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS 10
+
+enum amdgpu_hpd_id {
+	AMDGPU_HPD_1 = 0,
+	AMDGPU_HPD_2,
+	AMDGPU_HPD_3,
+	AMDGPU_HPD_4,
+	AMDGPU_HPD_5,
+	AMDGPU_HPD_6,
+	AMDGPU_HPD_NONE = 0xff,
+};
+
+enum amdgpu_crtc_irq {
+	AMDGPU_CRTC_IRQ_VBLANK1 = 0,
+	AMDGPU_CRTC_IRQ_VBLANK2,
+	AMDGPU_CRTC_IRQ_VBLANK3,
+	AMDGPU_CRTC_IRQ_VBLANK4,
+	AMDGPU_CRTC_IRQ_VBLANK5,
+	AMDGPU_CRTC_IRQ_VBLANK6,
+	AMDGPU_CRTC_IRQ_VLINE1,
+	AMDGPU_CRTC_IRQ_VLINE2,
+	AMDGPU_CRTC_IRQ_VLINE3,
+	AMDGPU_CRTC_IRQ_VLINE4,
+	AMDGPU_CRTC_IRQ_VLINE5,
+	AMDGPU_CRTC_IRQ_VLINE6,
+	AMDGPU_CRTC_IRQ_NONE = 0xff
+};
+
+enum amdgpu_pageflip_irq {
+	AMDGPU_PAGEFLIP_IRQ_D1 = 0,
+	AMDGPU_PAGEFLIP_IRQ_D2,
+	AMDGPU_PAGEFLIP_IRQ_D3,
+	AMDGPU_PAGEFLIP_IRQ_D4,
+	AMDGPU_PAGEFLIP_IRQ_D5,
+	AMDGPU_PAGEFLIP_IRQ_D6,
+	AMDGPU_PAGEFLIP_IRQ_NONE = 0xff
+};
+
+enum amdgpu_flip_status {
+	AMDGPU_FLIP_NONE,
+	AMDGPU_FLIP_PENDING,
+	AMDGPU_FLIP_SUBMITTED
+};
+
+#define AMDGPU_MAX_I2C_BUS 16
+
+/* amdgpu gpio-based i2c
+ * 1. "mask" reg and bits
+ *    grabs the gpio pins for software use
+ *    0=not held  1=held
+ * 2. "a" reg and bits
+ *    output pin value
+ *    0=low 1=high
+ * 3. "en" reg and bits
+ *    sets the pin direction
+ *    0=input 1=output
+ * 4. "y" reg and bits
+ *    input pin value
+ *    0=low 1=high
+ */
+struct amdgpu_i2c_bus_rec {
+	bool valid;
+	/* id used by atom */
+	uint8_t i2c_id;
+	/* id used by atom */
+	enum amdgpu_hpd_id hpd;
+	/* can be used with hw i2c engine */
+	bool hw_capable;
+	/* uses multi-media i2c engine */
+	bool mm_i2c;
+	/* regs and bits */
+	uint32_t mask_clk_reg;
+	uint32_t mask_data_reg;
+	uint32_t a_clk_reg;
+	uint32_t a_data_reg;
+	uint32_t en_clk_reg;
+	uint32_t en_data_reg;
+	uint32_t y_clk_reg;
+	uint32_t y_data_reg;
+	uint32_t mask_clk_mask;
+	uint32_t mask_data_mask;
+	uint32_t a_clk_mask;
+	uint32_t a_data_mask;
+	uint32_t en_clk_mask;
+	uint32_t en_data_mask;
+	uint32_t y_clk_mask;
+	uint32_t y_data_mask;
+};
+
+#define AMDGPU_MAX_BIOS_CONNECTOR 16
+
+/* pll flags */
+#define AMDGPU_PLL_USE_BIOS_DIVS        (1 << 0)
+#define AMDGPU_PLL_NO_ODD_POST_DIV      (1 << 1)
+#define AMDGPU_PLL_USE_REF_DIV          (1 << 2)
+#define AMDGPU_PLL_LEGACY               (1 << 3)
+#define AMDGPU_PLL_PREFER_LOW_REF_DIV   (1 << 4)
+#define AMDGPU_PLL_PREFER_HIGH_REF_DIV  (1 << 5)
+#define AMDGPU_PLL_PREFER_LOW_FB_DIV    (1 << 6)
+#define AMDGPU_PLL_PREFER_HIGH_FB_DIV   (1 << 7)
+#define AMDGPU_PLL_PREFER_LOW_POST_DIV  (1 << 8)
+#define AMDGPU_PLL_PREFER_HIGH_POST_DIV (1 << 9)
+#define AMDGPU_PLL_USE_FRAC_FB_DIV      (1 << 10)
+#define AMDGPU_PLL_PREFER_CLOSEST_LOWER (1 << 11)
+#define AMDGPU_PLL_USE_POST_DIV         (1 << 12)
+#define AMDGPU_PLL_IS_LCD               (1 << 13)
+#define AMDGPU_PLL_PREFER_MINM_OVER_MAXP (1 << 14)
+
+struct amdgpu_pll {
+	/* reference frequency */
+	uint32_t reference_freq;
+
+	/* fixed dividers */
+	uint32_t reference_div;
+	uint32_t post_div;
+
+	/* pll in/out limits */
+	uint32_t pll_in_min;
+	uint32_t pll_in_max;
+	uint32_t pll_out_min;
+	uint32_t pll_out_max;
+	uint32_t lcd_pll_out_min;
+	uint32_t lcd_pll_out_max;
+	uint32_t best_vco;
+
+	/* divider limits */
+	uint32_t min_ref_div;
+	uint32_t max_ref_div;
+	uint32_t min_post_div;
+	uint32_t max_post_div;
+	uint32_t min_feedback_div;
+	uint32_t max_feedback_div;
+	uint32_t min_frac_feedback_div;
+	uint32_t max_frac_feedback_div;
+
+	/* flags for the current clock */
+	uint32_t flags;
+
+	/* pll id */
+	uint32_t id;
+};
+
+struct amdgpu_i2c_chan {
+	struct i2c_adapter adapter;
+	struct drm_device *dev;
+	struct i2c_algo_bit_data bit;
+	struct amdgpu_i2c_bus_rec rec;
+	struct drm_dp_aux aux;
+	bool has_aux;
+	struct mutex mutex;
+};
+
+struct amdgpu_afmt {
+	bool enabled;
+	int offset;
+	bool last_buffer_filled_status;
+	int id;
+	struct amdgpu_audio_pin *pin;
+};
+
+/*
+ * Audio
+ */
+struct amdgpu_audio_pin {
+	int			channels;
+	int			rate;
+	int			bits_per_sample;
+	u8			status_bits;
+	u8			category_code;
+	u32			offset;
+	bool			connected;
+	u32			id;
+};
+
+struct amdgpu_audio {
+	bool enabled;
+	struct amdgpu_audio_pin pin[AMDGPU_MAX_AFMT_BLOCKS];
+	int num_pins;
+};
+
+struct amdgpu_display_funcs {
+	/* display watermarks */
+	void (*bandwidth_update)(struct amdgpu_device *adev);
+	/* get frame count */
+	u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
+	/* set backlight level */
+	void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
+				    u8 level);
+	/* get backlight level */
+	u8 (*backlight_get_level)(struct amdgpu_encoder *amdgpu_encoder);
+	/* hotplug detect */
+	bool (*hpd_sense)(struct amdgpu_device *adev, enum amdgpu_hpd_id hpd);
+	void (*hpd_set_polarity)(struct amdgpu_device *adev,
+				 enum amdgpu_hpd_id hpd);
+	u32 (*hpd_get_gpio_reg)(struct amdgpu_device *adev);
+	/* pageflipping */
+	void (*page_flip)(struct amdgpu_device *adev,
+			  int crtc_id, u64 crtc_base, bool async);
+	int (*page_flip_get_scanoutpos)(struct amdgpu_device *adev, int crtc,
+					u32 *vbl, u32 *position);
+	/* display topology setup */
+	void (*add_encoder)(struct amdgpu_device *adev,
+			    uint32_t encoder_enum,
+			    uint32_t supported_device,
+			    u16 caps);
+	void (*add_connector)(struct amdgpu_device *adev,
+			      uint32_t connector_id,
+			      uint32_t supported_device,
+			      int connector_type,
+			      struct amdgpu_i2c_bus_rec *i2c_bus,
+			      uint16_t connector_object_id,
+			      struct amdgpu_hpd *hpd,
+			      struct amdgpu_router *router);
+
+
+};
+
+struct amdgpu_framebuffer {
+	struct drm_framebuffer base;
+
+	uint64_t tiling_flags;
+	bool tmz_surface;
+	bool gfx12_dcc;
+
+	/* caching for later use */
+	uint64_t address;
+};
+
+struct amdgpu_mode_info {
+	struct atom_context *atom_context;
+	struct card_info *atom_card_info;
+	bool mode_config_initialized;
+	struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
+	struct drm_plane *planes[AMDGPU_MAX_PLANES];
+	struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
+	/* DVI-I properties */
+	struct drm_property *coherent_mode_property;
+	/* DAC enable load detect */
+	struct drm_property *load_detect_property;
+	/* underscan */
+	struct drm_property *underscan_property;
+	struct drm_property *underscan_hborder_property;
+	struct drm_property *underscan_vborder_property;
+	/* audio */
+	struct drm_property *audio_property;
+	/* FMT dithering */
+	struct drm_property *dither_property;
+	/* Adaptive Backlight Modulation (power feature) */
+	struct drm_property *abm_level_property;
+	/* hardcoded DFP edid from BIOS */
+	const struct drm_edid *bios_hardcoded_edid;
+
+	/* firmware flags */
+	u32 firmware_flags;
+	/* pointer to backlight encoder */
+	struct amdgpu_encoder *bl_encoder;
+	u8 bl_level; /* saved backlight level */
+	struct amdgpu_audio	audio; /* audio stuff */
+	int			num_crtc; /* number of crtcs */
+	int			num_hpd; /* number of hpd pins */
+	int			num_dig; /* number of dig blocks */
+	bool			gpu_vm_support; /* supports display from GTT */
+	int			disp_priority;
+	const struct amdgpu_display_funcs *funcs;
+	const enum drm_plane_type *plane_type;
+
+	/* Driver-private color mgmt props */
+
+	/* @plane_degamma_lut_property: Plane property to set a degamma LUT to
+	 * convert encoded values to light linear values before sampling or
+	 * blending.
+	 */
+	struct drm_property *plane_degamma_lut_property;
+	/* @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of degamma LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_degamma_lut_size_property;
+	/**
+	 * @plane_degamma_tf_property: Plane pre-defined transfer function to
+	 * to go from scanout/encoded values to linear values.
+	 */
+	struct drm_property *plane_degamma_tf_property;
+	/**
+	 * @plane_hdr_mult_property:
+	 */
+	struct drm_property *plane_hdr_mult_property;
+
+	struct drm_property *plane_ctm_property;
+	/**
+	 * @shaper_lut_property: Plane property to set pre-blending shaper LUT
+	 * that converts color content before 3D LUT. If
+	 * plane_shaper_tf_property != Identity TF, AMD color module will
+	 * combine the user LUT values with pre-defined TF into the LUT
+	 * parameters to be programmed.
+	 */
+	struct drm_property *plane_shaper_lut_property;
+	/**
+	 * @shaper_lut_size_property: Plane property for the size of
+	 * pre-blending shaper LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_shaper_lut_size_property;
+	/**
+	 * @plane_shaper_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending shaper (before applying 3D LUT)
+	 * with or without LUT. There is no shaper ROM, but we can use AMD
+	 * color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_shaper_tf_property;
+	/**
+	 * @plane_lut3d_property: Plane property for color transformation using
+	 * a 3D LUT (pre-blending), a three-dimensional array where each
+	 * element is an RGB triplet. Each dimension has the size of
+	 * lut3d_size. The array contains samples from the approximated
+	 * function. On AMD, values between samples are estimated by
+	 * tetrahedral interpolation. The array is accessed with three indices,
+	 * one for each input dimension (color channel), blue being the
+	 * outermost dimension, red the innermost.
+	 */
+	struct drm_property *plane_lut3d_property;
+	/**
+	 * @plane_degamma_lut_size_property: Plane property to define the max
+	 * size of 3D LUT as supported by the driver (read-only). The max size
+	 * is the max size of one dimension and, therefore, the max number of
+	 * entries for 3D LUT array is the 3D LUT size cubed;
+	 */
+	struct drm_property *plane_lut3d_size_property;
+	/**
+	 * @plane_blend_lut_property: Plane property for output gamma before
+	 * blending. Userspace set a blend LUT to convert colors after 3D LUT
+	 * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they
+	 * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property
+	 * != Identity TF, AMD color module will combine the user LUT values
+	 * with pre-defined TF into the LUT parameters to be programmed.
+	 */
+	struct drm_property *plane_blend_lut_property;
+	/**
+	 * @plane_blend_lut_size_property: Plane property to define the max
+	 * size of blend LUT as supported by the driver (read-only).
+	 */
+	struct drm_property *plane_blend_lut_size_property;
+	/**
+	 * @plane_blend_tf_property: Plane property to set a predefined
+	 * transfer function for pre-blending blend/out_gamma (after applying
+	 * 3D LUT) with or without LUT. There is no blend ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *plane_blend_tf_property;
+	/* @regamma_tf_property: Transfer function for CRTC regamma
+	 * (post-blending). Possible values are defined by `enum
+	 * amdgpu_transfer_function`. There is no regamma ROM, but we can use
+	 * AMD color modules to program LUT parameters from predefined TF (or
+	 * from a combination of pre-defined TF and the custom 1D LUT).
+	 */
+	struct drm_property *regamma_tf_property;
+};
+
+#define AMDGPU_MAX_BL_LEVEL 0xFF
+
+struct amdgpu_backlight_privdata {
+	struct amdgpu_encoder *encoder;
+	uint8_t negative;
+};
+
+struct amdgpu_atom_ss {
+	uint16_t percentage;
+	uint16_t percentage_divider;
+	uint8_t type;
+	uint16_t step;
+	uint8_t delay;
+	uint8_t range;
+	uint8_t refdiv;
+	/* asic_ss */
+	uint16_t rate;
+	uint16_t amount;
+};
+
+struct amdgpu_crtc {
+	struct drm_crtc base;
+	int crtc_id;
+	bool enabled;
+	bool can_tile;
+	uint32_t crtc_offset;
+	struct drm_gem_object *cursor_bo;
+	uint64_t cursor_addr;
+	int cursor_x;
+	int cursor_y;
+	int cursor_hot_x;
+	int cursor_hot_y;
+	int cursor_width;
+	int cursor_height;
+	int max_cursor_width;
+	int max_cursor_height;
+	enum amdgpu_rmx_type rmx_type;
+	u8 h_border;
+	u8 v_border;
+	fixed20_12 vsc;
+	fixed20_12 hsc;
+	struct drm_display_mode native_mode;
+	u32 pll_id;
+	/* page flipping */
+	struct amdgpu_flip_work *pflip_works;
+	enum amdgpu_flip_status pflip_status;
+	int deferred_flip_completion;
+	/* parameters access from DM IRQ handler */
+	struct dm_irq_params dm_irq_params;
+	/* pll sharing */
+	struct amdgpu_atom_ss ss;
+	bool ss_enabled;
+	u32 adjusted_clock;
+	int bpc;
+	u32 pll_reference_div;
+	u32 pll_post_div;
+	u32 pll_flags;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	/* for dpm */
+	u32 line_time;
+	u32 lb_vblank_lead_lines;
+	struct drm_display_mode hw_mode;
+	/* for virtual dce */
+	struct hrtimer vblank_timer;
+	enum amdgpu_interrupt_state vsync_timer_enabled;
+
+	int otg_inst;
+	struct drm_pending_vblank_event *event;
+
+	bool wb_pending;
+	bool wb_enabled;
+	struct drm_writeback_connector *wb_conn;
+};
+
+struct amdgpu_encoder_atom_dig {
+	bool linkb;
+	/* atom dig */
+	bool coherent_mode;
+	int dig_encoder; /* -1 disabled, 0 DIGA, 1 DIGB, etc. */
+	/* atom lvds/edp */
+	uint32_t lcd_misc;
+	uint16_t panel_pwr_delay;
+	uint32_t lcd_ss_id;
+	/* panel mode */
+	struct drm_display_mode native_mode;
+	struct backlight_device *bl_dev;
+	int dpms_mode;
+	uint8_t backlight_level;
+	int panel_mode;
+	struct amdgpu_afmt *afmt;
+};
+
+struct amdgpu_encoder {
+	struct drm_encoder base;
+	uint32_t encoder_enum;
+	uint32_t encoder_id;
+	uint32_t devices;
+	uint32_t active_device;
+	uint32_t flags;
+	uint32_t pixel_clock;
+	enum amdgpu_rmx_type rmx_type;
+	enum amdgpu_underscan_type underscan_type;
+	uint32_t underscan_hborder;
+	uint32_t underscan_vborder;
+	struct drm_display_mode native_mode;
+	void *enc_priv;
+	int audio_polling_active;
+	bool is_ext_encoder;
+	u16 caps;
+};
+
+struct amdgpu_connector_atom_dig {
+	/* displayport */
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+	u8 downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
+	u8 dp_sink_type;
+	int dp_clock;
+	int dp_lane_count;
+	bool edp_on;
+};
+
+struct amdgpu_gpio_rec {
+	bool valid;
+	u8 id;
+	u32 reg;
+	u32 mask;
+	u32 shift;
+};
+
+struct amdgpu_hpd {
+	enum amdgpu_hpd_id hpd;
+	u8 plugged_state;
+	struct amdgpu_gpio_rec gpio;
+};
+
+struct amdgpu_router {
+	u32 router_id;
+	struct amdgpu_i2c_bus_rec i2c_info;
+	u8 i2c_addr;
+	/* i2c mux */
+	bool ddc_valid;
+	u8 ddc_mux_type;
+	u8 ddc_mux_control_pin;
+	u8 ddc_mux_state;
+	/* clock/data mux */
+	bool cd_valid;
+	u8 cd_mux_type;
+	u8 cd_mux_control_pin;
+	u8 cd_mux_state;
+};
+
+enum amdgpu_connector_audio {
+	AMDGPU_AUDIO_DISABLE = 0,
+	AMDGPU_AUDIO_ENABLE = 1,
+	AMDGPU_AUDIO_AUTO = 2
+};
+
+enum amdgpu_connector_dither {
+	AMDGPU_FMT_DITHER_DISABLE = 0,
+	AMDGPU_FMT_DITHER_ENABLE = 1,
+};
+
+struct amdgpu_dm_dp_aux {
+	struct drm_dp_aux aux;
+	struct ddc_service *ddc_service;
+};
+
+struct amdgpu_i2c_adapter {
+	struct i2c_adapter base;
+
+	struct ddc_service *ddc_service;
+	bool oem;
+};
+
+#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
+
+struct amdgpu_connector {
+	struct drm_connector base;
+	uint32_t connector_id;
+	uint32_t devices;
+	struct amdgpu_i2c_chan *ddc_bus;
+	/* some systems have an hdmi and vga port with a shared ddc line */
+	bool shared_ddc;
+	bool use_digital;
+	/* we need to mind the EDID between detect
+	   and get modes due to analog/digital/tvencoder */
+	struct edid *edid;
+	void *con_priv;
+	bool dac_load_detect;
+	bool detected_by_load; /* if the connection status was determined by load */
+	bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
+	uint16_t connector_object_id;
+	struct amdgpu_hpd hpd;
+	struct amdgpu_router router;
+	struct amdgpu_i2c_chan *router_bus;
+	enum amdgpu_connector_audio audio;
+	enum amdgpu_connector_dither dither;
+	unsigned pixelclock_for_modeset;
+};
+
+/* TODO: start to use this struct and remove same field from base one */
+struct amdgpu_mst_connector {
+	struct amdgpu_connector base;
+
+	struct drm_dp_mst_topology_mgr mst_mgr;
+	struct amdgpu_dm_dp_aux dm_dp_aux;
+	struct drm_dp_mst_port *mst_output_port;
+	struct amdgpu_connector *mst_root;
+	bool is_mst_connector;
+	struct amdgpu_encoder *mst_encoder;
+};
+
+#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
+				((em) == ATOM_ENCODER_MODE_DP_MST))
+
+/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
+#define DRM_SCANOUTPOS_VALID        (1 << 0)
+#define DRM_SCANOUTPOS_IN_VBLANK    (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
+#define USE_REAL_VBLANKSTART		(1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
+
+void amdgpu_link_encoder_connector(struct drm_device *dev);
+
+struct drm_connector *
+amdgpu_get_connector_for_encoder(struct drm_encoder *encoder);
+struct drm_connector *
+amdgpu_get_connector_for_encoder_init(struct drm_encoder *encoder);
+bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
+				    u32 pixel_clock);
+
+u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
+struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
+
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+			      bool use_aux);
+
+void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
+
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+			unsigned int pipe, unsigned int flags, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode);
+
+int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+
+void amdgpu_enc_destroy(struct drm_encoder *encoder);
+void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode);
+void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
+			     struct drm_display_mode *adjusted_mode);
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
+
+bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
+			bool in_vblank_irq, int *vpos,
+			int *hpos, ktime_t *stime, ktime_t *etime,
+			const struct drm_display_mode *mode);
+
+/* amdgpu_display.c */
+void amdgpu_display_print_display_setup(struct drm_device *dev);
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+				   struct drm_modeset_acquire_ctx *ctx);
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags, uint32_t target,
+				struct drm_modeset_acquire_ctx *ctx);
+extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
new file mode 100644
index 000000000000..a974265837f0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_nbio_ras *ras;
+
+	if (!adev->nbio.ras)
+		return 0;
+
+	ras = adev->nbio.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->nbio.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count)
+		return adev->nbio.funcs->get_pcie_replay_count(adev);
+
+	return 0;
+}
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev) || !adev->asic_funcs ||
+	    !adev->asic_funcs->get_pcie_replay_count ||
+	    (!adev->nbio.funcs || !adev->nbio.funcs->get_pcie_replay_count))
+		return false;
+
+	return true;
+}
+
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r;
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
+		if (r)
+			goto late_fini;
+		r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+		if (r)
+			goto late_fini;
+	}
+
+	return 0;
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
new file mode 100644
index 000000000000..b528de6a01f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_NBIO_H__
+#define __AMDGPU_NBIO_H__
+
+/*
+ * amdgpu nbio functions
+ */
+struct nbio_hdp_flush_reg {
+	u32 ref_and_mask_cp0;
+	u32 ref_and_mask_cp1;
+	u32 ref_and_mask_cp2;
+	u32 ref_and_mask_cp3;
+	u32 ref_and_mask_cp4;
+	u32 ref_and_mask_cp5;
+	u32 ref_and_mask_cp6;
+	u32 ref_and_mask_cp7;
+	u32 ref_and_mask_cp8;
+	u32 ref_and_mask_cp9;
+	u32 ref_and_mask_sdma0;
+	u32 ref_and_mask_sdma1;
+	u32 ref_and_mask_sdma2;
+	u32 ref_and_mask_sdma3;
+	u32 ref_and_mask_sdma4;
+	u32 ref_and_mask_sdma5;
+	u32 ref_and_mask_sdma6;
+	u32 ref_and_mask_sdma7;
+};
+
+struct amdgpu_nbio_ras {
+	struct amdgpu_ras_block_object ras_block;
+	void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev);
+	void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev);
+	int (*init_ras_controller_interrupt)(struct amdgpu_device *adev);
+	int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio_funcs {
+	const struct nbio_hdp_flush_reg *hdp_flush_reg;
+	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
+	u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev);
+	u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);
+	u32 (*get_rev_id)(struct amdgpu_device *adev);
+	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
+	u32 (*get_memsize)(struct amdgpu_device *adev);
+	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
+			bool use_doorbell, int doorbell_index, int doorbell_size);
+	void (*vpe_doorbell_range)(struct amdgpu_device *adev, int instance,
+			bool use_doorbell, int doorbell_index, int doorbell_size);
+	void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+				   int doorbell_index, int instance);
+	void (*gc_doorbell_init)(struct amdgpu_device *adev);
+	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
+					 bool enable);
+	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
+						  bool enable);
+	void (*ih_doorbell_range)(struct amdgpu_device *adev,
+				  bool use_doorbell, int doorbell_index);
+	void (*enable_doorbell_interrupt)(struct amdgpu_device *adev,
+					  bool enable);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
+						bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u64 *flags);
+	void (*ih_control)(struct amdgpu_device *adev);
+	void (*init_registers)(struct amdgpu_device *adev);
+	void (*remap_hdp_registers)(struct amdgpu_device *adev);
+	void (*enable_aspm)(struct amdgpu_device *adev,
+			    bool enable);
+	void (*program_aspm)(struct amdgpu_device *adev);
+	void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
+	void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
+	void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
+	u32 (*get_rom_offset)(struct amdgpu_device *adev);
+	int (*get_compute_partition_mode)(struct amdgpu_device *adev);
+	u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
+					 u32 *supp_modes);
+	bool (*is_nps_switch_requested)(struct amdgpu_device *adev);
+	u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
+	void (*set_reg_remap)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_nbio {
+	const struct nbio_hdp_flush_reg *hdp_flush_reg;
+	struct amdgpu_irq_src ras_controller_irq;
+	struct amdgpu_irq_src ras_err_event_athub_irq;
+	struct ras_common_if *ras_if;
+	const struct amdgpu_nbio_funcs *funcs;
+	struct amdgpu_nbio_ras  *ras;
+};
+
+int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
+
+bool amdgpu_nbio_is_replay_cnt_supported(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
new file mode 100644
index 000000000000..e08f58de4b17
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -0,0 +1,1688 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ *    Dave Airlie
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/dma-buf.h>
+#include <linux/export.h>
+
+#include <drm/drm_drv.h>
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_cache.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
+
+/**
+ * DOC: amdgpu_object
+ *
+ * This defines the interfaces to operate on an &amdgpu_bo buffer object which
+ * represents memory used by driver (VRAM, system memory, etc.). The driver
+ * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces
+ * to create/destroy/set buffer object which are then managed by the kernel TTM
+ * memory manager.
+ * The interfaces are also used internally by kernel clients, including gfx,
+ * uvd, etc. for kernel managed allocations used by the GPU.
+ *
+ */
+
+static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+
+	amdgpu_bo_kunmap(bo);
+
+	if (drm_gem_is_imported(&bo->tbo.base))
+		drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg);
+	drm_gem_object_release(&bo->tbo.base);
+	amdgpu_bo_unref(&bo->parent);
+	kvfree(bo);
+}
+
+static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+	struct amdgpu_bo_user *ubo;
+
+	ubo = to_amdgpu_bo_user(bo);
+	kfree(ubo->metadata);
+	amdgpu_bo_destroy(tbo);
+}
+
+/**
+ * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
+ * @bo: buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * an &amdgpu_bo.
+ *
+ * Returns:
+ * true if the object belongs to &amdgpu_bo, false if not.
+ */
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
+{
+	if (bo->destroy == &amdgpu_bo_destroy ||
+	    bo->destroy == &amdgpu_bo_user_destroy)
+		return true;
+
+	return false;
+}
+
+/**
+ * amdgpu_bo_placement_from_domain - set buffer's placement
+ * @abo: &amdgpu_bo buffer object whose placement is to be set
+ * @domain: requested domain
+ *
+ * Sets buffer's placement according to requested domain and the buffer's
+ * flags.
+ */
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct ttm_placement *placement = &abo->placement;
+	struct ttm_place *places = abo->placements;
+	u64 flags = abo->flags;
+	u32 c = 0;
+
+	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+		unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+		int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+
+		if (adev->gmc.mem_partitions && mem_id >= 0) {
+			places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn;
+			/*
+			 * memory partition range lpfn is inclusive start + size - 1
+			 * TTM place lpfn is exclusive start + size
+			 */
+			places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1;
+		} else {
+			places[c].fpfn = 0;
+			places[c].lpfn = 0;
+		}
+		places[c].mem_type = TTM_PL_VRAM;
+		places[c].flags = 0;
+
+		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+			places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn);
+		else
+			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+		if (abo->tbo.type == ttm_bo_type_kernel &&
+		    flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_DOORBELL;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_MMIO_REMAP) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_MMIO_REMAP;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type =
+			abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
+			AMDGPU_PL_PREEMPT : TTM_PL_TT;
+		places[c].flags = 0;
+		/*
+		 * When GTT is just an alternative to VRAM make sure that we
+		 * only use it as fallback and still try to fill up VRAM first.
+		 */
+		if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
+		    domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+			places[c].flags |= TTM_PL_FLAG_FALLBACK;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = TTM_PL_SYSTEM;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_GDS;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_GWS;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (domain & AMDGPU_GEM_DOMAIN_OA) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_OA;
+		places[c].flags = 0;
+		c++;
+	}
+
+	if (!c) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = TTM_PL_SYSTEM;
+		places[c].flags = 0;
+		c++;
+	}
+
+	BUG_ON(c > AMDGPU_BO_MAX_PLACEMENTS);
+
+	placement->num_placement = c;
+	placement->placement = places;
+}
+
+/**
+ * amdgpu_bo_create_reserved - create reserved BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use, and returns it still
+ * reserved.
+ *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+			      unsigned long size, int align,
+			      u32 domain, struct amdgpu_bo **bo_ptr,
+			      u64 *gpu_addr, void **cpu_addr)
+{
+	struct amdgpu_bo_param bp;
+	bool free = false;
+	int r;
+
+	if (!size) {
+		amdgpu_bo_unref(bo_ptr);
+		return 0;
+	}
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = align;
+	bp.domain = domain;
+	bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
+		: AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	if (!*bo_ptr) {
+		r = amdgpu_bo_create(adev, &bp, bo_ptr);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
+				r);
+			return r;
+		}
+		free = true;
+	}
+
+	r = amdgpu_bo_reserve(*bo_ptr, false);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
+		goto error_free;
+	}
+
+	r = amdgpu_bo_pin(*bo_ptr, domain);
+	if (r) {
+		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
+		goto error_unreserve;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo);
+	if (r) {
+		dev_err(adev->dev, "%p bind failed\n", *bo_ptr);
+		goto error_unpin;
+	}
+
+	if (gpu_addr)
+		*gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
+	if (cpu_addr) {
+		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
+			goto error_unpin;
+		}
+	}
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(*bo_ptr);
+error_unreserve:
+	amdgpu_bo_unreserve(*bo_ptr);
+
+error_free:
+	if (free)
+		amdgpu_bo_unref(bo_ptr);
+
+	return r;
+}
+
+/**
+ * amdgpu_bo_create_kernel - create BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr:  used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+			    unsigned long size, int align,
+			    u32 domain, struct amdgpu_bo **bo_ptr,
+			    u64 *gpu_addr, void **cpu_addr)
+{
+	int r;
+
+	r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
+				      gpu_addr, cpu_addr);
+
+	if (r)
+		return r;
+
+	if (*bo_ptr)
+		amdgpu_bo_unreserve(*bo_ptr);
+
+	return 0;
+}
+
+/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo:  used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+			   struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+			   u64 *gpu_addr)
+
+{
+	struct drm_gem_object *gem_obj;
+	int r;
+
+	gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+	*bo = gem_to_amdgpu_bo(gem_obj);
+	if (!(*bo)) {
+		dev_err(adev->dev, "failed to get valid isp user bo\n");
+		return -EINVAL;
+	}
+
+	r = amdgpu_bo_reserve(*bo, false);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+		return r;
+	}
+
+	r = amdgpu_bo_pin(*bo, domain);
+	if (r) {
+		dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+		goto error_unreserve;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+	if (r) {
+		dev_err(adev->dev, "%p bind failed\n", *bo);
+		goto error_unpin;
+	}
+
+	if (!WARN_ON(!gpu_addr))
+		*gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+	amdgpu_bo_unreserve(*bo);
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(*bo);
+error_unreserve:
+	amdgpu_bo_unreserve(*bo);
+	amdgpu_bo_unref(bo);
+
+	return r;
+}
+
+/**
+ * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
+ *
+ * @adev: amdgpu device object
+ * @offset: offset of the BO
+ * @size: size of the BO
+ * @bo_ptr:  used to initialize BOs in structures
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Creates a kernel BO at a specific offset in VRAM.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+			       uint64_t offset, uint64_t size,
+			       struct amdgpu_bo **bo_ptr, void **cpu_addr)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	unsigned int i;
+	int r;
+
+	offset &= PAGE_MASK;
+	size = ALIGN(size, PAGE_SIZE);
+
+	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+				      cpu_addr);
+	if (r)
+		return r;
+
+	if ((*bo_ptr) == NULL)
+		return 0;
+
+	/*
+	 * Remove the original mem node and create a new one at the request
+	 * position.
+	 */
+	if (cpu_addr)
+		amdgpu_bo_kunmap(*bo_ptr);
+
+	ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
+
+	for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
+		(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
+		(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+	}
+	r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
+			     &(*bo_ptr)->tbo.resource, &ctx);
+	if (r)
+		goto error;
+
+	if (cpu_addr) {
+		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
+		if (r)
+			goto error;
+	}
+
+	amdgpu_bo_unreserve(*bo_ptr);
+	return 0;
+
+error:
+	amdgpu_bo_unreserve(*bo_ptr);
+	amdgpu_bo_unref(bo_ptr);
+	return r;
+}
+
+/**
+ * amdgpu_bo_free_kernel - free BO for kernel use
+ *
+ * @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
+ *
+ * unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
+ */
+void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
+			   void **cpu_addr)
+{
+	if (*bo == NULL)
+		return;
+
+	WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
+	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
+		if (cpu_addr)
+			amdgpu_bo_kunmap(*bo);
+
+		amdgpu_bo_unpin(*bo);
+		amdgpu_bo_unreserve(*bo);
+	}
+	amdgpu_bo_unref(bo);
+
+	if (gpu_addr)
+		*gpu_addr = 0;
+
+	if (cpu_addr)
+		*cpu_addr = NULL;
+}
+
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+	if (bo == NULL)
+		return;
+
+	if (amdgpu_bo_reserve(bo, true) == 0) {
+		amdgpu_bo_unpin(bo);
+		amdgpu_bo_unreserve(bo);
+	}
+	amdgpu_bo_unref(&bo);
+}
+
+/* Validate bo size is bit bigger than the request domain */
+static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
+					  unsigned long size, u32 domain)
+{
+	struct ttm_resource_manager *man = NULL;
+
+	/*
+	 * If GTT is part of requested domains the check must succeed to
+	 * allow fall back to GTT.
+	 */
+	if (domain & AMDGPU_GEM_DOMAIN_GTT)
+		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+	else if (domain & AMDGPU_GEM_DOMAIN_VRAM)
+		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+	else
+		return true;
+
+	if (!man) {
+		if (domain & AMDGPU_GEM_DOMAIN_GTT)
+			WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+		return false;
+	}
+
+	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */
+	if (size < man->size)
+		return true;
+
+	DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size);
+	return false;
+}
+
+bool amdgpu_bo_support_uswc(u64 bo_flags)
+{
+
+#ifdef CONFIG_X86_32
+	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
+	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
+	 */
+	return false;
+#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
+	/* Don't try to enable write-combining when it can't work, or things
+	 * may be slow
+	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
+	 */
+
+#ifndef CONFIG_COMPILE_TEST
+#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
+	 thanks to write-combining
+#endif
+
+	if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
+			      "better performance thanks to write-combining\n");
+	return false;
+#else
+	/* For architectures that don't support WC memory,
+	 * mask out the WC flag from the BO
+	 */
+	if (!drm_arch_can_wc_memory())
+		return false;
+
+	return true;
+#endif
+}
+
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
+			       struct amdgpu_bo_param *bp,
+			       struct amdgpu_bo **bo_ptr)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = (bp->type != ttm_bo_type_kernel),
+		.no_wait_gpu = bp->no_wait_gpu,
+		/* We opt to avoid OOM on system pages allocations */
+		.gfp_retry_mayfail = true,
+		.allow_res_evict = bp->type != ttm_bo_type_kernel,
+		.resv = bp->resv
+	};
+	struct amdgpu_bo *bo;
+	unsigned long page_align, size = bp->size;
+	int r;
+
+	/* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
+	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+		/* GWS and OA don't need any alignment. */
+		page_align = bp->byte_align;
+		size <<= PAGE_SHIFT;
+
+	} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
+		/* Both size and alignment must be a multiple of 4. */
+		page_align = ALIGN(bp->byte_align, 4);
+		size = ALIGN(size, 4) << PAGE_SHIFT;
+	} else {
+		/* Memory should be aligned at least to a page size. */
+		page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+		size = ALIGN(size, PAGE_SIZE);
+	}
+
+	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
+		return -ENOMEM;
+
+	BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
+
+	*bo_ptr = NULL;
+	bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
+	if (bo == NULL)
+		return -ENOMEM;
+	drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
+	bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
+	bo->vm_bo = NULL;
+	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+		bp->domain;
+	bo->allowed_domains = bo->preferred_domains;
+	if (bp->type != ttm_bo_type_kernel &&
+	    !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) &&
+	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
+	bo->flags = bp->flags;
+
+	if (adev->gmc.mem_partitions)
+		/* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */
+		bo->xcp_id = bp->xcp_id_plus1 - 1;
+	else
+		/* For GPUs without spatial partitioning */
+		bo->xcp_id = 0;
+
+	if (!amdgpu_bo_support_uswc(bo->flags))
+		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+	bo->tbo.bdev = &adev->mman.bdev;
+	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
+			  AMDGPU_GEM_DOMAIN_GDS))
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+	else
+		amdgpu_bo_placement_from_domain(bo, bp->domain);
+	if (bp->type == ttm_bo_type_kernel)
+		bo->tbo.priority = 2;
+	else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
+		bo->tbo.priority = 1;
+
+	if (!bp->destroy)
+		bp->destroy = &amdgpu_bo_destroy;
+
+	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, bp->type,
+				 &bo->placement, page_align, &ctx,  NULL,
+				 bp->resv, bp->destroy);
+	if (unlikely(r != 0))
+		return r;
+
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+	    amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
+					     ctx.bytes_moved);
+	else
+		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
+
+	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+	    bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+		struct dma_fence *fence;
+
+		r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
+		if (unlikely(r))
+			goto fail_unreserve;
+
+		dma_resv_add_fence(bo->tbo.base.resv, fence,
+				   DMA_RESV_USAGE_KERNEL);
+		dma_fence_put(fence);
+	}
+	if (!bp->resv)
+		amdgpu_bo_unreserve(bo);
+	*bo_ptr = bo;
+
+	trace_amdgpu_bo_create(bo);
+
+	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
+	if (bp->type == ttm_bo_type_device)
+		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	return 0;
+
+fail_unreserve:
+	if (!bp->resv)
+		dma_resv_unlock(bo->tbo.base.resv);
+	amdgpu_bo_unref(&bo);
+	return r;
+}
+
+/**
+ * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @ubo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be used by user application;
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+			  struct amdgpu_bo_param *bp,
+			  struct amdgpu_bo_user **ubo_ptr)
+{
+	struct amdgpu_bo *bo_ptr;
+	int r;
+
+	bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
+	bp->destroy = &amdgpu_bo_user_destroy;
+	r = amdgpu_bo_create(adev, bp, &bo_ptr);
+	if (r)
+		return r;
+
+	*ubo_ptr = to_amdgpu_bo_user(bo_ptr);
+	return r;
+}
+
+/**
+ * amdgpu_bo_create_vm - create an &amdgpu_bo_vm buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @vmbo_ptr: pointer to the buffer object pointer
+ *
+ * Create a BO to be for GPUVM.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+			struct amdgpu_bo_param *bp,
+			struct amdgpu_bo_vm **vmbo_ptr)
+{
+	struct amdgpu_bo *bo_ptr;
+	int r;
+
+	/* bo_ptr_size will be determined by the caller and it depends on
+	 * num of amdgpu_vm_pt entries.
+	 */
+	BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo_vm));
+	r = amdgpu_bo_create(adev, bp, &bo_ptr);
+	if (r)
+		return r;
+
+	*vmbo_ptr = to_amdgpu_bo_vm(bo_ptr);
+	return r;
+}
+
+/**
+ * amdgpu_bo_kmap - map an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be mapped
+ * @ptr: kernel virtual address to be returned
+ *
+ * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
+ * amdgpu_bo_kptr() to get the kernel virtual address.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
+{
+	void *kptr;
+	long r;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+		return -EPERM;
+
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+				  false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+
+	kptr = amdgpu_bo_kptr(bo);
+	if (kptr) {
+		if (ptr)
+			*ptr = kptr;
+		return 0;
+	}
+
+	r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);
+	if (r)
+		return r;
+
+	if (ptr)
+		*ptr = amdgpu_bo_kptr(bo);
+
+	return 0;
+}
+
+/**
+ * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
+ *
+ * Returns:
+ * the virtual address of a buffer object area.
+ */
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
+{
+	bool is_iomem;
+
+	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+}
+
+/**
+ * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unmapped
+ *
+ * Unmaps a kernel map set up by amdgpu_bo_kmap().
+ */
+void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
+{
+	if (bo->kmap.bo)
+		ttm_bo_kunmap(&bo->kmap);
+}
+
+/**
+ * amdgpu_bo_ref - reference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * References the contained &ttm_buffer_object.
+ *
+ * Returns:
+ * a refcounted pointer to the &amdgpu_bo buffer object.
+ */
+struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
+{
+	if (bo == NULL)
+		return NULL;
+
+	drm_gem_object_get(&bo->tbo.base);
+	return bo;
+}
+
+/**
+ * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Unreferences the contained &ttm_buffer_object and clear the pointer
+ */
+void amdgpu_bo_unref(struct amdgpu_bo **bo)
+{
+	if ((*bo) == NULL)
+		return;
+
+	drm_gem_object_put(&(*bo)->tbo.base);
+	*bo = NULL;
+}
+
+/**
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ *
+ * Pins the buffer object according to requested domain. If the memory is
+ * unbound gart memory, binds the pages into gart table. Adjusts pin_count and
+ * pin_size accordingly.
+ *
+ * Pinning means to lock pages in memory along with keeping them at a fixed
+ * offset. It is required when a buffer can not be moved, for example, when
+ * a display buffer is being scanned out.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { false, false };
+	int r, i;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+		return -EPERM;
+
+	/* Check domain to be pinned to against preferred domains */
+	if (bo->preferred_domains & domain)
+		domain = bo->preferred_domains & domain;
+
+	/* A shared bo cannot be migrated to VRAM */
+	if (drm_gem_is_imported(&bo->tbo.base)) {
+		if (domain & AMDGPU_GEM_DOMAIN_GTT)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+		else
+			return -EINVAL;
+	}
+
+	if (bo->tbo.pin_count) {
+		uint32_t mem_type = bo->tbo.resource->mem_type;
+		uint32_t mem_flags = bo->tbo.resource->placement;
+
+		if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
+			return -EINVAL;
+
+		if ((mem_type == TTM_PL_VRAM) &&
+		    (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) &&
+		    !(mem_flags & TTM_PL_FLAG_CONTIGUOUS))
+			return -EINVAL;
+
+		ttm_bo_pin(&bo->tbo);
+		return 0;
+	}
+
+	/* This assumes only APU display buffers are pinned with (VRAM|GTT).
+	 * See function amdgpu_display_supported_domains()
+	 */
+	domain = amdgpu_bo_get_preferred_domain(adev, domain);
+
+	if (drm_gem_is_imported(&bo->tbo.base))
+		dma_buf_pin(bo->tbo.base.import_attach);
+
+	/* force to pin into visible video ram */
+	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
+		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	amdgpu_bo_placement_from_domain(bo, domain);
+	for (i = 0; i < bo->placement.num_placement; i++) {
+		if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+		    bo->placements[i].mem_type == TTM_PL_VRAM)
+			bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+	}
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (unlikely(r)) {
+		dev_err(adev->dev, "%p pin failed\n", bo);
+		goto error;
+	}
+
+	ttm_bo_pin(&bo->tbo);
+
+	if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+		atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
+		atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
+			     &adev->visible_pin_size);
+	} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
+		atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
+	}
+
+error:
+	return r;
+}
+
+/**
+ * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unpinned
+ *
+ * Decreases the pin_count, and clears the flags if pin_count reaches 0.
+ * Changes placement and pin size accordingly.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+void amdgpu_bo_unpin(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	ttm_bo_unpin(&bo->tbo);
+	if (bo->tbo.pin_count)
+		return;
+
+	if (drm_gem_is_imported(&bo->tbo.base))
+		dma_buf_unpin(bo->tbo.base.import_attach);
+
+	if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
+		atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+		atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+			     &adev->visible_pin_size);
+	} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
+		atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+	}
+
+}
+
+static const char * const amdgpu_vram_names[] = {
+	"UNKNOWN",
+	"GDDR1",
+	"DDR2",
+	"GDDR3",
+	"GDDR4",
+	"GDDR5",
+	"HBM",
+	"DDR3",
+	"DDR4",
+	"GDDR6",
+	"DDR5",
+	"LPDDR4",
+	"LPDDR5",
+	"HBM3E"
+};
+
+/**
+ * amdgpu_bo_init - initialize memory manager
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_init() to initialize amdgpu memory manager.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_init(struct amdgpu_device *adev)
+{
+	/* On A+A platform, VRAM can be mapped as WB */
+	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+		/* reserve PAT memory space to WC for VRAM */
+		int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+				adev->gmc.aper_size);
+
+		if (r) {
+			DRM_ERROR("Unable to set WC memtype for the aperture base\n");
+			return r;
+		}
+
+		/* Add an MTRR for the VRAM */
+		adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+				adev->gmc.aper_size);
+	}
+
+	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
+		 adev->gmc.mc_vram_size >> 20,
+		 (unsigned long long)adev->gmc.aper_size >> 20);
+	DRM_INFO("RAM width %dbits %s\n",
+		 adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
+	return amdgpu_ttm_init(adev);
+}
+
+/**
+ * amdgpu_bo_fini - tear down memory manager
+ * @adev: amdgpu device object
+ *
+ * Reverses amdgpu_bo_init() to tear down memory manager.
+ */
+void amdgpu_bo_fini(struct amdgpu_device *adev)
+{
+	int idx;
+
+	amdgpu_ttm_fini(adev);
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+			arch_phys_wc_del(adev->gmc.vram_mtrr);
+			arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
+		}
+		drm_dev_exit(idx);
+	}
+}
+
+/**
+ * amdgpu_bo_set_tiling_flags - set tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: new flags
+ *
+ * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or
+ * kernel driver to set the tiling flags on a buffer.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_bo_user *ubo;
+
+	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+	if (adev->family <= AMDGPU_FAMILY_CZ &&
+	    AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
+		return -EINVAL;
+
+	ubo = to_amdgpu_bo_user(bo);
+	ubo->tiling_flags = tiling_flags;
+	return 0;
+}
+
+/**
+ * amdgpu_bo_get_tiling_flags - get tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: returned flags
+ *
+ * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to
+ * set the tiling flags on a buffer.
+ */
+void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
+{
+	struct amdgpu_bo_user *ubo;
+
+	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+	dma_resv_assert_held(bo->tbo.base.resv);
+	ubo = to_amdgpu_bo_user(bo);
+
+	if (tiling_flags)
+		*tiling_flags = ubo->tiling_flags;
+}
+
+/**
+ * amdgpu_bo_set_metadata - set metadata
+ * @bo: &amdgpu_bo buffer object
+ * @metadata: new metadata
+ * @metadata_size: size of the new metadata
+ * @flags: flags of the new metadata
+ *
+ * Sets buffer object's metadata, its size and flags.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata,
+			   u32 metadata_size, uint64_t flags)
+{
+	struct amdgpu_bo_user *ubo;
+	void *buffer;
+
+	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+	ubo = to_amdgpu_bo_user(bo);
+	if (!metadata_size) {
+		if (ubo->metadata_size) {
+			kfree(ubo->metadata);
+			ubo->metadata = NULL;
+			ubo->metadata_size = 0;
+		}
+		return 0;
+	}
+
+	if (metadata == NULL)
+		return -EINVAL;
+
+	buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
+	if (buffer == NULL)
+		return -ENOMEM;
+
+	kfree(ubo->metadata);
+	ubo->metadata_flags = flags;
+	ubo->metadata = buffer;
+	ubo->metadata_size = metadata_size;
+
+	return 0;
+}
+
+/**
+ * amdgpu_bo_get_metadata - get metadata
+ * @bo: &amdgpu_bo buffer object
+ * @buffer: returned metadata
+ * @buffer_size: size of the buffer
+ * @metadata_size: size of the returned metadata
+ * @flags: flags of the returned metadata
+ *
+ * Gets buffer object's metadata, its size and flags. buffer_size shall not be
+ * less than metadata_size.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
+			   size_t buffer_size, uint32_t *metadata_size,
+			   uint64_t *flags)
+{
+	struct amdgpu_bo_user *ubo;
+
+	if (!buffer && !metadata_size)
+		return -EINVAL;
+
+	BUG_ON(bo->tbo.type == ttm_bo_type_kernel);
+	ubo = to_amdgpu_bo_user(bo);
+	if (metadata_size)
+		*metadata_size = ubo->metadata_size;
+
+	if (buffer) {
+		if (buffer_size < ubo->metadata_size)
+			return -EINVAL;
+
+		if (ubo->metadata_size)
+			memcpy(buffer, ubo->metadata, ubo->metadata_size);
+	}
+
+	if (flags)
+		*flags = ubo->metadata_flags;
+
+	return 0;
+}
+
+/**
+ * amdgpu_bo_move_notify - notification about a memory move
+ * @bo: pointer to a buffer object
+ * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new resource for backing the BO
+ *
+ * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
+ * bookkeeping.
+ * TTM driver callback which is called when ttm moves a buffer.
+ */
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+			   bool evict,
+			   struct ttm_resource *new_mem)
+{
+	struct ttm_resource *old_mem = bo->resource;
+	struct amdgpu_bo *abo;
+
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
+		return;
+
+	abo = ttm_to_amdgpu_bo(bo);
+	amdgpu_vm_bo_move(abo, new_mem, evict);
+
+	amdgpu_bo_kunmap(abo);
+
+	if (abo->tbo.base.dma_buf && !drm_gem_is_imported(&abo->tbo.base) &&
+	    old_mem && old_mem->mem_type != TTM_PL_SYSTEM)
+		dma_buf_move_notify(abo->tbo.base.dma_buf);
+
+	/* move_notify is called before move happens */
+	trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
+			     old_mem ? old_mem->mem_type : -1);
+}
+
+/**
+ * amdgpu_bo_release_notify - notification about a BO being released
+ * @bo: pointer to a buffer object
+ *
+ * Wipes VRAM buffers whose contents should not be leaked before the
+ * memory is released.
+ */
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct dma_fence *fence = NULL;
+	struct amdgpu_bo *abo;
+	int r;
+
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
+		return;
+
+	abo = ttm_to_amdgpu_bo(bo);
+
+	WARN_ON(abo->vm_bo);
+
+	if (abo->kfd_bo)
+		amdgpu_amdkfd_release_notify(abo);
+
+	/*
+	 * We lock the private dma_resv object here and since the BO is about to
+	 * be released nobody else should have a pointer to it.
+	 * So when this locking here fails something is wrong with the reference
+	 * counting.
+	 */
+	if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
+		return;
+
+	amdgpu_amdkfd_remove_all_eviction_fences(abo);
+
+	if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
+	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
+	    adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
+		goto out;
+
+	r = dma_resv_reserve_fences(&bo->base._resv, 1);
+	if (r)
+		goto out;
+
+	r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true,
+			       AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE);
+	if (WARN_ON(r))
+		goto out;
+
+	amdgpu_vram_mgr_set_cleared(bo->resource);
+	dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
+	dma_fence_put(fence);
+
+out:
+	dma_resv_unlock(&bo->base._resv);
+}
+
+/**
+ * amdgpu_bo_fault_reserve_notify - notification about a memory fault
+ * @bo: pointer to a buffer object
+ *
+ * Notifies the driver we are taking a fault on this BO and have reserved it,
+ * also performs bookkeeping.
+ * TTM driver callback for dealing with vm faults.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	int r;
+
+	/* Remember that this BO was accessed by the CPU */
+	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	if (amdgpu_res_cpu_visible(adev, bo->resource))
+		return 0;
+
+	/* Can't move a pinned BO to visible VRAM */
+	if (abo->tbo.pin_count > 0)
+		return VM_FAULT_SIGBUS;
+
+	/* hurrah the memory is not visible ! */
+	atomic64_inc(&adev->num_vram_cpu_page_faults);
+	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+					AMDGPU_GEM_DOMAIN_GTT);
+
+	/* Avoid costly evictions; only set GTT as a busy placement */
+	abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
+
+	r = ttm_bo_validate(bo, &abo->placement, &ctx);
+	if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
+		return VM_FAULT_NOPAGE;
+	else if (unlikely(r))
+		return VM_FAULT_SIGBUS;
+
+	/* this should never happen */
+	if (bo->resource->mem_type == TTM_PL_VRAM &&
+	    !amdgpu_res_cpu_visible(adev, bo->resource))
+		return VM_FAULT_SIGBUS;
+
+	ttm_bo_move_to_lru_tail_unlocked(bo);
+	return 0;
+}
+
+/**
+ * amdgpu_bo_fence - add fence to buffer object
+ *
+ * @bo: buffer object in question
+ * @fence: fence to add
+ * @shared: true if fence should be added shared
+ *
+ */
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
+		     bool shared)
+{
+	struct dma_resv *resv = bo->tbo.base.resv;
+	int r;
+
+	r = dma_resv_reserve_fences(resv, 1);
+	if (r) {
+		/* As last resort on OOM we block for the fence */
+		dma_fence_wait(fence, false);
+		return;
+	}
+
+	dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+			   DMA_RESV_USAGE_WRITE);
+}
+
+/**
+ * amdgpu_bo_sync_wait_resv - Wait for BO reservation fences
+ *
+ * @adev: amdgpu device pointer
+ * @resv: reservation object to sync to
+ * @sync_mode: synchronization mode
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Extract the fences from the reservation object and waits for them to finish.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
+			     enum amdgpu_sync_mode sync_mode, void *owner,
+			     bool intr)
+{
+	struct amdgpu_sync sync;
+	int r;
+
+	amdgpu_sync_create(&sync);
+	amdgpu_sync_resv(adev, &sync, resv, sync_mode, owner);
+	r = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+	return r;
+}
+
+/**
+ * amdgpu_bo_sync_wait - Wrapper for amdgpu_bo_sync_wait_resv
+ * @bo: buffer object to wait for
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Wrapper to wait for fences in a BO.
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	return amdgpu_bo_sync_wait_resv(adev, bo->tbo.base.resv,
+					AMDGPU_SYNC_NE_OWNER, owner, intr);
+}
+
+/**
+ * amdgpu_bo_gpu_offset - return GPU offset of bo
+ * @bo:	amdgpu object for which we query the offset
+ *
+ * Note: object should either be pinned or reserved when calling this
+ * function, it might be useful to add check for this for debugging.
+ *
+ * Returns:
+ * current GPU offset of the object.
+ */
+u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
+{
+	WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_SYSTEM);
+	WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&
+		     !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel);
+	WARN_ON_ONCE(bo->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET);
+	WARN_ON_ONCE(bo->tbo.resource->mem_type == TTM_PL_VRAM &&
+		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
+
+	return amdgpu_bo_gpu_offset_no_check(bo);
+}
+
+/**
+ * amdgpu_bo_fb_aper_addr - return FB aperture GPU offset of the VRAM bo
+ * @bo:	amdgpu VRAM buffer object for which we query the offset
+ *
+ * Returns:
+ * current FB aperture GPU offset of the object.
+ */
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint64_t offset, fb_base;
+
+	WARN_ON_ONCE(bo->tbo.resource->mem_type != TTM_PL_VRAM);
+
+	fb_base = adev->gmc.fb_start;
+	fb_base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+	offset = (bo->tbo.resource->start << PAGE_SHIFT) + fb_base;
+	return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
+ * amdgpu_bo_gpu_offset_no_check - return GPU offset of bo
+ * @bo:	amdgpu object for which we query the offset
+ *
+ * Returns:
+ * current GPU offset of the object without raising warnings.
+ */
+u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint64_t offset = AMDGPU_BO_INVALID_OFFSET;
+
+	if (bo->tbo.resource->mem_type == TTM_PL_TT)
+		offset = amdgpu_gmc_agp_addr(&bo->tbo);
+
+	if (offset == AMDGPU_BO_INVALID_OFFSET)
+		offset = (bo->tbo.resource->start << PAGE_SHIFT) +
+			amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type);
+
+	return amdgpu_gmc_sign_extend(offset);
+}
+
+/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo:	the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+	uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+	if (!domain)
+		return TTM_PL_SYSTEM;
+
+	switch (rounddown_pow_of_two(domain)) {
+	case AMDGPU_GEM_DOMAIN_CPU:
+		return TTM_PL_SYSTEM;
+	case AMDGPU_GEM_DOMAIN_GTT:
+		return TTM_PL_TT;
+	case AMDGPU_GEM_DOMAIN_VRAM:
+		return TTM_PL_VRAM;
+	case AMDGPU_GEM_DOMAIN_GDS:
+		return AMDGPU_PL_GDS;
+	case AMDGPU_GEM_DOMAIN_GWS:
+		return AMDGPU_PL_GWS;
+	case AMDGPU_GEM_DOMAIN_OA:
+		return AMDGPU_PL_OA;
+	case AMDGPU_GEM_DOMAIN_DOORBELL:
+		return AMDGPU_PL_DOORBELL;
+	case AMDGPU_GEM_DOMAIN_MMIO_REMAP:
+		return AMDGPU_PL_MMIO_REMAP;
+	default:
+		return TTM_PL_SYSTEM;
+	}
+}
+
+/**
+ * amdgpu_bo_get_preferred_domain - get preferred domain
+ * @adev: amdgpu device object
+ * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
+ *
+ * Returns:
+ * Which of the allowed domains is preferred for allocating the BO.
+ */
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
+					    uint32_t domain)
+{
+	if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+	    ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
+		domain = AMDGPU_GEM_DOMAIN_VRAM;
+		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+	}
+	return domain;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+#define amdgpu_bo_print_flag(m, bo, flag)		        \
+	do {							\
+		if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\
+			seq_printf((m), " " #flag);		\
+		}						\
+	} while (0)
+
+/**
+ * amdgpu_bo_print_info - print BO info in debugfs file
+ *
+ * @id: Index or Id of the BO
+ * @bo: Requested BO for printing info
+ * @m: debugfs file
+ *
+ * Print BO information in debugfs file
+ *
+ * Returns:
+ * Size of the BO in bytes.
+ */
+u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct dma_buf_attachment *attachment;
+	struct dma_buf *dma_buf;
+	const char *placement;
+	unsigned int pin_count;
+	u64 size;
+
+	if (dma_resv_trylock(bo->tbo.base.resv)) {
+		if (!bo->tbo.resource) {
+			placement = "NONE";
+		} else {
+			switch (bo->tbo.resource->mem_type) {
+			case TTM_PL_VRAM:
+				if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
+					placement = "VRAM VISIBLE";
+				else
+					placement = "VRAM";
+				break;
+			case TTM_PL_TT:
+				placement = "GTT";
+				break;
+			case AMDGPU_PL_GDS:
+				placement = "GDS";
+				break;
+			case AMDGPU_PL_GWS:
+				placement = "GWS";
+				break;
+			case AMDGPU_PL_OA:
+				placement = "OA";
+				break;
+			case AMDGPU_PL_PREEMPT:
+				placement = "PREEMPTIBLE";
+				break;
+			case AMDGPU_PL_DOORBELL:
+				placement = "DOORBELL";
+				break;
+			case AMDGPU_PL_MMIO_REMAP:
+				placement = "MMIO REMAP";
+				break;
+			case TTM_PL_SYSTEM:
+			default:
+				placement = "CPU";
+				break;
+			}
+		}
+		dma_resv_unlock(bo->tbo.base.resv);
+	} else {
+		placement = "UNKNOWN";
+	}
+
+	size = amdgpu_bo_size(bo);
+	seq_printf(m, "\t\t0x%08x: %12lld byte %s",
+			id, size, placement);
+
+	pin_count = READ_ONCE(bo->tbo.pin_count);
+	if (pin_count)
+		seq_printf(m, " pin count %d", pin_count);
+
+	dma_buf = READ_ONCE(bo->tbo.base.dma_buf);
+	attachment = READ_ONCE(bo->tbo.base.import_attach);
+
+	if (attachment)
+		seq_printf(m, " imported from ino:%lu", file_inode(dma_buf->file)->i_ino);
+	else if (dma_buf)
+		seq_printf(m, " exported as ino:%lu", file_inode(dma_buf->file)->i_ino);
+
+	amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+	amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
+	amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
+	amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
+	amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+	amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+	amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
+	/* Add the gem obj resv fence dump*/
+	if (dma_resv_trylock(bo->tbo.base.resv)) {
+		dma_resv_describe(bo->tbo.base.resv, m);
+		dma_resv_unlock(bo->tbo.base.resv);
+	}
+	seq_puts(m, "\n");
+
+	return size;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
new file mode 100644
index 000000000000..52c2d1731aab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#ifndef __AMDGPU_OBJECT_H__
+#define __AMDGPU_OBJECT_H__
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_res_cursor.h"
+
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
+
+#define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
+#define AMDGPU_BO_MAX_PLACEMENTS	3
+
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO	(1ULL << 63)
+
+#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
+#define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo)
+
+struct amdgpu_bo_param {
+	unsigned long			size;
+	int				byte_align;
+	u32				bo_ptr_size;
+	u32				domain;
+	u32				preferred_domain;
+	u64				flags;
+	enum ttm_bo_type		type;
+	bool				no_wait_gpu;
+	struct dma_resv			*resv;
+	void				(*destroy)(struct ttm_buffer_object *bo);
+	/* xcp partition number plus 1, 0 means any partition */
+	int8_t				xcp_id_plus1;
+};
+
+/* bo virtual addresses in a vm */
+struct amdgpu_bo_va_mapping {
+	struct amdgpu_bo_va		*bo_va;
+	struct list_head		list;
+	struct rb_node			rb;
+	uint64_t			start;
+	uint64_t			last;
+	uint64_t			__subtree_last;
+	uint64_t			offset;
+	uint32_t			flags;
+};
+
+/* User space allocated BO in a VM */
+struct amdgpu_bo_va {
+	struct amdgpu_vm_bo_base	base;
+
+	/* protected by bo being reserved */
+	unsigned			ref_count;
+
+	/* all other members protected by the VM PD being reserved */
+	struct dma_fence	        *last_pt_update;
+
+	/* mappings for this bo_va */
+	struct list_head		invalids;
+	struct list_head		valids;
+
+	/* If the mappings are cleared or filled */
+	bool				cleared;
+
+	bool				is_xgmi;
+
+	/*
+	 * protected by vm reservation lock
+	 * if non-zero, cannot unmap from GPU because user queues may still access it
+	 */
+	unsigned int			queue_refcount;
+	atomic_t			userq_va_mapped;
+};
+
+struct amdgpu_bo {
+	/* Protected by tbo.reserved */
+	u32				preferred_domains;
+	u32				allowed_domains;
+	struct ttm_place		placements[AMDGPU_BO_MAX_PLACEMENTS];
+	struct ttm_placement		placement;
+	struct ttm_buffer_object	tbo;
+	struct ttm_bo_kmap_obj		kmap;
+	u64				flags;
+	/* per VM structure for page tables and with virtual addresses */
+	struct amdgpu_vm_bo_base	*vm_bo;
+	/* Constant after initialization */
+	struct amdgpu_bo		*parent;
+
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_interval_notifier	notifier;
+#endif
+	struct kgd_mem                  *kfd_bo;
+
+	/*
+	 * For GPUs with spatial partitioning, xcp partition number, -1 means
+	 * any partition. For other ASICs without spatial partition, always 0
+	 * for memory accounting.
+	 */
+	int8_t				xcp_id;
+};
+
+struct amdgpu_bo_user {
+	struct amdgpu_bo		bo;
+	u64				tiling_flags;
+	u64				metadata_flags;
+	void				*metadata;
+	u32				metadata_size;
+
+};
+
+struct amdgpu_bo_vm {
+	struct amdgpu_bo		bo;
+	struct amdgpu_vm_bo_base        entries[];
+};
+
+static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
+{
+	return container_of(tbo, struct amdgpu_bo, tbo);
+}
+
+/**
+ * amdgpu_mem_type_to_domain - return domain corresponding to mem_type
+ * @mem_type:	ttm memory type
+ *
+ * Returns corresponding domain of the ttm mem_type
+ */
+static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
+{
+	switch (mem_type) {
+	case TTM_PL_VRAM:
+		return AMDGPU_GEM_DOMAIN_VRAM;
+	case TTM_PL_TT:
+		return AMDGPU_GEM_DOMAIN_GTT;
+	case TTM_PL_SYSTEM:
+		return AMDGPU_GEM_DOMAIN_CPU;
+	case AMDGPU_PL_GDS:
+		return AMDGPU_GEM_DOMAIN_GDS;
+	case AMDGPU_PL_GWS:
+		return AMDGPU_GEM_DOMAIN_GWS;
+	case AMDGPU_PL_OA:
+		return AMDGPU_GEM_DOMAIN_OA;
+	case AMDGPU_PL_DOORBELL:
+		return AMDGPU_GEM_DOMAIN_DOORBELL;
+	case AMDGPU_PL_MMIO_REMAP:
+		return AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_bo_reserve - reserve bo
+ * @bo:		bo structure
+ * @no_intr:	don't return -ERESTARTSYS on pending signal
+ *
+ * Returns:
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			dev_err(adev->dev, "%p reserve failed\n", bo);
+		return r;
+	}
+	return 0;
+}
+
+static inline void amdgpu_bo_unreserve(struct amdgpu_bo *bo)
+{
+	ttm_bo_unreserve(&bo->tbo);
+}
+
+static inline unsigned long amdgpu_bo_size(struct amdgpu_bo *bo)
+{
+	return bo->tbo.base.size;
+}
+
+static inline unsigned amdgpu_bo_ngpu_pages(struct amdgpu_bo *bo)
+{
+	return bo->tbo.base.size / AMDGPU_GPU_PAGE_SIZE;
+}
+
+static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo)
+{
+	return (bo->tbo.page_alignment << PAGE_SHIFT) / AMDGPU_GPU_PAGE_SIZE;
+}
+
+/**
+ * amdgpu_bo_mmap_offset - return mmap offset of bo
+ * @bo:	amdgpu object for which we query the offset
+ *
+ * Returns mmap offset of the object.
+ */
+static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
+{
+	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
+}
+
+/**
+ * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
+ */
+static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
+{
+	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+}
+
+/**
+ * amdgpu_bo_encrypted - test if the BO is encrypted
+ * @bo: pointer to a buffer object
+ *
+ * Return true if the buffer object is encrypted, false otherwise.
+ */
+static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo)
+{
+	return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED;
+}
+
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
+
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     struct amdgpu_bo_param *bp,
+		     struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+			      unsigned long size, int align,
+			      u32 domain, struct amdgpu_bo **bo_ptr,
+			      u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+			    unsigned long size, int align,
+			    u32 domain, struct amdgpu_bo **bo_ptr,
+			    u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+			   struct dma_buf *dbuf, u32 domain,
+			   struct amdgpu_bo **bo,
+			   u64 *gpu_addr);
+int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
+			       uint64_t offset, uint64_t size,
+			       struct amdgpu_bo **bo_ptr, void **cpu_addr);
+int amdgpu_bo_create_user(struct amdgpu_device *adev,
+			  struct amdgpu_bo_param *bp,
+			  struct amdgpu_bo_user **ubo_ptr);
+int amdgpu_bo_create_vm(struct amdgpu_device *adev,
+			struct amdgpu_bo_param *bp,
+			struct amdgpu_bo_vm **ubo_ptr);
+void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
+			   void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
+int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
+void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
+struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
+void amdgpu_bo_unref(struct amdgpu_bo **bo);
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
+void amdgpu_bo_unpin(struct amdgpu_bo *bo);
+int amdgpu_bo_init(struct amdgpu_device *adev);
+void amdgpu_bo_fini(struct amdgpu_device *adev);
+int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags);
+void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags);
+int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
+			    uint32_t metadata_size, uint64_t flags);
+int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
+			   size_t buffer_size, uint32_t *metadata_size,
+			   uint64_t *flags);
+void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
+			   bool evict,
+			   struct ttm_resource *new_mem);
+void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
+vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
+void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
+		     bool shared);
+int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
+			     enum amdgpu_sync_mode sync_mode, void *owner,
+			     bool intr);
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
+u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
+u64 amdgpu_bo_fb_aper_addr(struct amdgpu_bo *bo);
+u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
+uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
+					    uint32_t domain);
+
+/*
+ * sub allocation
+ */
+static inline struct amdgpu_sa_manager *
+to_amdgpu_sa_manager(struct drm_suballoc_manager *manager)
+{
+	return container_of(manager, struct amdgpu_sa_manager, base);
+}
+
+static inline uint64_t amdgpu_sa_bo_gpu_addr(struct drm_suballoc *sa_bo)
+{
+	return to_amdgpu_sa_manager(sa_bo->manager)->gpu_addr +
+		drm_suballoc_soffset(sa_bo);
+}
+
+static inline void *amdgpu_sa_bo_cpu_addr(struct drm_suballoc *sa_bo)
+{
+	return to_amdgpu_sa_manager(sa_bo->manager)->cpu_ptr +
+		drm_suballoc_soffset(sa_bo);
+}
+
+int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
+				     struct amdgpu_sa_manager *sa_manager,
+				     unsigned size, u32 align, u32 domain);
+void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
+				      struct amdgpu_sa_manager *sa_manager);
+int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
+				      struct amdgpu_sa_manager *sa_manager);
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+		     struct drm_suballoc **sa_bo,
+		     unsigned int size);
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
+		       struct dma_fence *fence);
+#if defined(CONFIG_DEBUG_FS)
+void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
+					 struct seq_file *m);
+u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m);
+#endif
+void amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
+
+bool amdgpu_bo_support_uswc(u64 bo_flags);
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
new file mode 100644
index 000000000000..675aa138ea11
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include <asm/div64.h>
+#include <linux/gcd.h>
+
+/**
+ * amdgpu_pll_reduce_ratio - fractional number reduction
+ *
+ * @nom: nominator
+ * @den: denominator
+ * @nom_min: minimum value for nominator
+ * @den_min: minimum value for denominator
+ *
+ * Find the greatest common divisor and apply it on both nominator and
+ * denominator, but make nominator and denominator are at least as large
+ * as their minimum values.
+ */
+static void amdgpu_pll_reduce_ratio(unsigned *nom, unsigned *den,
+				    unsigned nom_min, unsigned den_min)
+{
+	unsigned tmp;
+
+	/* reduce the numbers to a simpler ratio */
+	tmp = gcd(*nom, *den);
+	*nom /= tmp;
+	*den /= tmp;
+
+	/* make sure nominator is large enough */
+	if (*nom < nom_min) {
+		tmp = DIV_ROUND_UP(nom_min, *nom);
+		*nom *= tmp;
+		*den *= tmp;
+	}
+
+	/* make sure the denominator is large enough */
+	if (*den < den_min) {
+		tmp = DIV_ROUND_UP(den_min, *den);
+		*nom *= tmp;
+		*den *= tmp;
+	}
+}
+
+/**
+ * amdgpu_pll_get_fb_ref_div - feedback and ref divider calculation
+ *
+ * @adev: amdgpu_device pointer
+ * @nom: nominator
+ * @den: denominator
+ * @post_div: post divider
+ * @fb_div_max: feedback divider maximum
+ * @ref_div_max: reference divider maximum
+ * @fb_div: resulting feedback divider
+ * @ref_div: resulting reference divider
+ *
+ * Calculate feedback and reference divider for a given post divider. Makes
+ * sure we stay within the limits.
+ */
+static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int nom,
+				      unsigned int den, unsigned int post_div,
+				      unsigned int fb_div_max, unsigned int ref_div_max,
+				      unsigned int *fb_div, unsigned int *ref_div)
+{
+
+	/* limit reference * post divider to a maximum */
+	if (adev->family == AMDGPU_FAMILY_SI)
+		ref_div_max = min(100 / post_div, ref_div_max);
+	else
+		ref_div_max = min(128 / post_div, ref_div_max);
+
+	/* get matching reference and feedback divider */
+	*ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
+	*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
+
+	/* limit fb divider to its maximum */
+	if (*fb_div > fb_div_max) {
+		*ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
+		*fb_div = fb_div_max;
+	}
+}
+
+/**
+ * amdgpu_pll_compute - compute PLL paramaters
+ *
+ * @adev: amdgpu_device pointer
+ * @pll: information about the PLL
+ * @freq: requested frequency
+ * @dot_clock_p: resulting pixel clock
+ * @fb_div_p: resulting feedback divider
+ * @frac_fb_div_p: fractional part of the feedback divider
+ * @ref_div_p: resulting reference divider
+ * @post_div_p: resulting reference divider
+ *
+ * Try to calculate the PLL parameters to generate the given frequency:
+ * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div)
+ */
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+			struct amdgpu_pll *pll,
+			u32 freq,
+			u32 *dot_clock_p,
+			u32 *fb_div_p,
+			u32 *frac_fb_div_p,
+			u32 *ref_div_p,
+			u32 *post_div_p)
+{
+	unsigned target_clock = pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV ?
+		freq : freq / 10;
+
+	unsigned fb_div_min, fb_div_max, fb_div;
+	unsigned post_div_min, post_div_max, post_div;
+	unsigned ref_div_min, ref_div_max, ref_div;
+	unsigned post_div_best, diff_best;
+	unsigned nom, den;
+
+	/* determine allowed feedback divider range */
+	fb_div_min = pll->min_feedback_div;
+	fb_div_max = pll->max_feedback_div;
+
+	if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+		fb_div_min *= 10;
+		fb_div_max *= 10;
+	}
+
+	/* determine allowed ref divider range */
+	if (pll->flags & AMDGPU_PLL_USE_REF_DIV)
+		ref_div_min = pll->reference_div;
+	else
+		ref_div_min = pll->min_ref_div;
+
+	if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV &&
+	    pll->flags & AMDGPU_PLL_USE_REF_DIV)
+		ref_div_max = pll->reference_div;
+	else
+		ref_div_max = pll->max_ref_div;
+
+	/* determine allowed post divider range */
+	if (pll->flags & AMDGPU_PLL_USE_POST_DIV) {
+		post_div_min = pll->post_div;
+		post_div_max = pll->post_div;
+	} else {
+		unsigned vco_min, vco_max;
+
+		if (pll->flags & AMDGPU_PLL_IS_LCD) {
+			vco_min = pll->lcd_pll_out_min;
+			vco_max = pll->lcd_pll_out_max;
+		} else {
+			vco_min = pll->pll_out_min;
+			vco_max = pll->pll_out_max;
+		}
+
+		if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+			vco_min *= 10;
+			vco_max *= 10;
+		}
+
+		post_div_min = vco_min / target_clock;
+		if ((target_clock * post_div_min) < vco_min)
+			++post_div_min;
+		if (post_div_min < pll->min_post_div)
+			post_div_min = pll->min_post_div;
+
+		post_div_max = vco_max / target_clock;
+		if ((target_clock * post_div_max) > vco_max)
+			--post_div_max;
+		if (post_div_max > pll->max_post_div)
+			post_div_max = pll->max_post_div;
+	}
+
+	/* represent the searched ratio as fractional number */
+	nom = target_clock;
+	den = pll->reference_freq;
+
+	/* reduce the numbers to a simpler ratio */
+	amdgpu_pll_reduce_ratio(&nom, &den, fb_div_min, post_div_min);
+
+	/* now search for a post divider */
+	if (pll->flags & AMDGPU_PLL_PREFER_MINM_OVER_MAXP)
+		post_div_best = post_div_min;
+	else
+		post_div_best = post_div_max;
+	diff_best = ~0;
+
+	for (post_div = post_div_min; post_div <= post_div_max; ++post_div) {
+		unsigned diff;
+		amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max,
+					  ref_div_max, &fb_div, &ref_div);
+		diff = abs(target_clock - (pll->reference_freq * fb_div) /
+			(ref_div * post_div));
+
+		if (diff < diff_best || (diff == diff_best &&
+		    !(pll->flags & AMDGPU_PLL_PREFER_MINM_OVER_MAXP))) {
+
+			post_div_best = post_div;
+			diff_best = diff;
+		}
+	}
+	post_div = post_div_best;
+
+	/* get the feedback and reference divider for the optimal value */
+	amdgpu_pll_get_fb_ref_div(adev, nom, den, post_div, fb_div_max, ref_div_max,
+				  &fb_div, &ref_div);
+
+	/* reduce the numbers to a simpler ratio once more */
+	/* this also makes sure that the reference divider is large enough */
+	amdgpu_pll_reduce_ratio(&fb_div, &ref_div, fb_div_min, ref_div_min);
+
+	/* avoid high jitter with small fractional dividers */
+	if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV && (fb_div % 10)) {
+		fb_div_min = max(fb_div_min, (9 - (fb_div % 10)) * 20 + 60);
+		if (fb_div < fb_div_min) {
+			unsigned tmp = DIV_ROUND_UP(fb_div_min, fb_div);
+			fb_div *= tmp;
+			ref_div *= tmp;
+		}
+	}
+
+	/* and finally save the result */
+	if (pll->flags & AMDGPU_PLL_USE_FRAC_FB_DIV) {
+		*fb_div_p = fb_div / 10;
+		*frac_fb_div_p = fb_div % 10;
+	} else {
+		*fb_div_p = fb_div;
+		*frac_fb_div_p = 0;
+	}
+
+	*dot_clock_p = ((pll->reference_freq * *fb_div_p * 10) +
+			(pll->reference_freq * *frac_fb_div_p)) /
+		       (ref_div * post_div * 10);
+	*ref_div_p = ref_div;
+	*post_div_p = post_div;
+
+	DRM_DEBUG_KMS("%d - %d, pll dividers - fb: %d.%d ref: %d, post %d\n",
+		      freq, *dot_clock_p * 10, *fb_div_p, *frac_fb_div_p,
+		      ref_div, post_div);
+}
+
+/**
+ * amdgpu_pll_get_use_mask - look up a mask of which pplls are in use
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the mask of which PPLLs (Pixel PLLs) are in use.
+ */
+u32 amdgpu_pll_get_use_mask(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_crtc *test_crtc;
+	struct amdgpu_crtc *test_amdgpu_crtc;
+	u32 pll_in_use = 0;
+
+	list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc == test_crtc)
+			continue;
+
+		test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+		if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+			pll_in_use |= (1 << test_amdgpu_crtc->pll_id);
+	}
+	return pll_in_use;
+}
+
+/**
+ * amdgpu_pll_get_shared_dp_ppll - return the PPLL used by another crtc for DP
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) used by another crtc/encoder which is
+ * also in DP mode.  For DP, a single PPLL can be used for all DP
+ * crtcs/encoders.
+ */
+int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_crtc *test_crtc;
+	struct amdgpu_crtc *test_amdgpu_crtc;
+
+	list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc == test_crtc)
+			continue;
+		test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+		if (test_amdgpu_crtc->encoder &&
+		    ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(test_amdgpu_crtc->encoder))) {
+			/* for DP use the same PLL for all */
+			if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+				return test_amdgpu_crtc->pll_id;
+		}
+	}
+	return ATOM_PPLL_INVALID;
+}
+
+/**
+ * amdgpu_pll_get_shared_nondp_ppll - return the PPLL used by another non-DP crtc
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) used by another non-DP crtc/encoder which can
+ * be shared (i.e., same clock).
+ */
+int amdgpu_pll_get_shared_nondp_ppll(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_crtc *test_crtc;
+	struct amdgpu_crtc *test_amdgpu_crtc;
+	u32 adjusted_clock, test_adjusted_clock;
+
+	adjusted_clock = amdgpu_crtc->adjusted_clock;
+
+	if (adjusted_clock == 0)
+		return ATOM_PPLL_INVALID;
+
+	list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc == test_crtc)
+			continue;
+		test_amdgpu_crtc = to_amdgpu_crtc(test_crtc);
+		if (test_amdgpu_crtc->encoder &&
+		    !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(test_amdgpu_crtc->encoder))) {
+			/* check if we are already driving this connector with another crtc */
+			if (test_amdgpu_crtc->connector == amdgpu_crtc->connector) {
+				/* if we are, return that pll */
+				if (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID)
+					return test_amdgpu_crtc->pll_id;
+			}
+			/* for non-DP check the clock */
+			test_adjusted_clock = test_amdgpu_crtc->adjusted_clock;
+			if ((crtc->mode.clock == test_crtc->mode.clock) &&
+			    (adjusted_clock == test_adjusted_clock) &&
+			    (amdgpu_crtc->ss_enabled == test_amdgpu_crtc->ss_enabled) &&
+			    (test_amdgpu_crtc->pll_id != ATOM_PPLL_INVALID))
+				return test_amdgpu_crtc->pll_id;
+		}
+	}
+	return ATOM_PPLL_INVALID;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
new file mode 100644
index 000000000000..44a583d6c9b4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PLL_H__
+#define __AMDGPU_PLL_H__
+
+void amdgpu_pll_compute(struct amdgpu_device *adev,
+			 struct amdgpu_pll *pll,
+			 u32 freq,
+			 u32 *dot_clock_p,
+			 u32 *fb_div_p,
+			 u32 *frac_fb_div_p,
+			 u32 *ref_div_p,
+			 u32 *post_div_p);
+u32 amdgpu_pll_get_use_mask(struct drm_crtc *crtc);
+int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc);
+int amdgpu_pll_get_shared_nondp_ppll(struct drm_crtc *crtc);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
new file mode 100644
index 000000000000..6e91ea1de5aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -0,0 +1,680 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/perf_event.h>
+#include <linux/init.h>
+#include "amdgpu.h"
+#include "amdgpu_pmu.h"
+
+#define PMU_NAME_SIZE 32
+#define NUM_FORMATS_AMDGPU_PMU		4
+#define NUM_FORMATS_DF_VEGA20		3
+#define NUM_EVENTS_DF_VEGA20		8
+#define NUM_EVENT_TYPES_VEGA20		1
+#define NUM_EVENTS_VEGA20_XGMI		2
+#define NUM_EVENTS_VEGA20_MAX		NUM_EVENTS_VEGA20_XGMI
+#define NUM_EVENT_TYPES_ARCTURUS	1
+#define NUM_EVENTS_ARCTURUS_XGMI	6
+#define NUM_EVENTS_ARCTURUS_MAX		NUM_EVENTS_ARCTURUS_XGMI
+
+struct amdgpu_pmu_event_attribute {
+	struct device_attribute attr;
+	const char *event_str;
+	unsigned int type;
+};
+
+/* record to keep track of pmu entry per pmu type per device */
+struct amdgpu_pmu_entry {
+	struct list_head entry;
+	struct amdgpu_device *adev;
+	struct pmu pmu;
+	unsigned int pmu_perf_type;
+	char *pmu_type_name;
+	char *pmu_file_prefix;
+	struct attribute_group fmt_attr_group;
+	struct amdgpu_pmu_event_attribute *fmt_attr;
+	struct attribute_group evt_attr_group;
+	struct amdgpu_pmu_event_attribute *evt_attr;
+};
+
+static ssize_t amdgpu_pmu_event_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_pmu_event_attribute *amdgpu_pmu_attr;
+
+	amdgpu_pmu_attr = container_of(attr, struct amdgpu_pmu_event_attribute,
+									attr);
+
+	if (!amdgpu_pmu_attr->type)
+		return sprintf(buf, "%s\n", amdgpu_pmu_attr->event_str);
+
+	return sprintf(buf, "%s,type=0x%x\n",
+			amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type);
+}
+
+static LIST_HEAD(amdgpu_pmu_list);
+
+
+struct amdgpu_pmu_attr {
+	const char *name;
+	const char *config;
+};
+
+struct amdgpu_pmu_type {
+	const unsigned int type;
+	const unsigned int num_of_type;
+};
+
+struct amdgpu_pmu_config {
+	struct amdgpu_pmu_attr *formats;
+	unsigned int num_formats;
+	struct amdgpu_pmu_attr *events;
+	unsigned int num_events;
+	struct amdgpu_pmu_type *types;
+	unsigned int num_types;
+};
+
+/*
+ * Events fall under two categories:
+ *  - PMU typed
+ *    Events in /sys/bus/event_source/devices/amdgpu_<pmu_type>_<dev_num> have
+ *    performance counter operations handled by one IP <pmu_type>.  Formats and
+ *    events should be defined by <pmu_type>_<asic_type>_formats and
+ *    <pmu_type>_<asic_type>_events respectively.
+ *
+ *  - Event config typed
+ *    Events in /sys/bus/event_source/devices/amdgpu_<dev_num> have performance
+ *    counter operations that can be handled by multiple IPs dictated by their
+ *    "type" format field.  Formats and events should be defined by
+ *    amdgpu_pmu_formats and <asic_type>_events respectively.  Format field
+ *    "type" is generated in amdgpu_pmu_event_show and defined in
+ *    <asic_type>_event_config_types.
+ */
+
+static struct amdgpu_pmu_attr amdgpu_pmu_formats[NUM_FORMATS_AMDGPU_PMU] = {
+	{ .name = "event", .config = "config:0-7" },
+	{ .name = "instance", .config = "config:8-15" },
+	{ .name = "umask", .config = "config:16-23"},
+	{ .name = "type", .config = "config:56-63"}
+};
+
+/* Vega20 events */
+static struct amdgpu_pmu_attr vega20_events[NUM_EVENTS_VEGA20_MAX] = {
+	{ .name = "xgmi_link0_data_outbound",
+			.config = "event=0x7,instance=0x46,umask=0x2" },
+	{ .name = "xgmi_link1_data_outbound",
+			.config = "event=0x7,instance=0x47,umask=0x2" }
+};
+
+static struct amdgpu_pmu_type vega20_types[NUM_EVENT_TYPES_VEGA20] = {
+	{ .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+					.num_of_type = NUM_EVENTS_VEGA20_XGMI }
+};
+
+static struct amdgpu_pmu_config vega20_config = {
+	.formats = amdgpu_pmu_formats,
+	.num_formats = ARRAY_SIZE(amdgpu_pmu_formats),
+	.events = vega20_events,
+	.num_events = ARRAY_SIZE(vega20_events),
+	.types = vega20_types,
+	.num_types = ARRAY_SIZE(vega20_types)
+};
+
+/* Vega20 data fabric (DF) events */
+static struct amdgpu_pmu_attr df_vega20_formats[NUM_FORMATS_DF_VEGA20] = {
+	{ .name = "event", .config = "config:0-7" },
+	{ .name = "instance", .config = "config:8-15" },
+	{ .name = "umask", .config = "config:16-23"}
+};
+
+static struct amdgpu_pmu_attr df_vega20_events[NUM_EVENTS_DF_VEGA20] = {
+	{ .name = "cake0_pcsout_txdata",
+			.config = "event=0x7,instance=0x46,umask=0x2" },
+	{ .name = "cake1_pcsout_txdata",
+			.config = "event=0x7,instance=0x47,umask=0x2" },
+	{ .name = "cake0_pcsout_txmeta",
+			.config = "event=0x7,instance=0x46,umask=0x4" },
+	{ .name = "cake1_pcsout_txmeta",
+			.config = "event=0x7,instance=0x47,umask=0x4" },
+	{ .name = "cake0_ftiinstat_reqalloc",
+			.config = "event=0xb,instance=0x46,umask=0x4" },
+	{ .name = "cake1_ftiinstat_reqalloc",
+			.config = "event=0xb,instance=0x47,umask=0x4" },
+	{ .name = "cake0_ftiinstat_rspalloc",
+			.config = "event=0xb,instance=0x46,umask=0x8" },
+	{ .name = "cake1_ftiinstat_rspalloc",
+			.config = "event=0xb,instance=0x47,umask=0x8" }
+};
+
+static struct amdgpu_pmu_config df_vega20_config = {
+	.formats = df_vega20_formats,
+	.num_formats = ARRAY_SIZE(df_vega20_formats),
+	.events = df_vega20_events,
+	.num_events = ARRAY_SIZE(df_vega20_events),
+	.types = NULL,
+	.num_types = 0
+};
+
+/* Arcturus events */
+static struct amdgpu_pmu_attr arcturus_events[NUM_EVENTS_ARCTURUS_MAX] = {
+	{ .name = "xgmi_link0_data_outbound",
+			.config = "event=0x7,instance=0x4b,umask=0x2" },
+	{ .name = "xgmi_link1_data_outbound",
+			.config = "event=0x7,instance=0x4c,umask=0x2" },
+	{ .name = "xgmi_link2_data_outbound",
+			.config = "event=0x7,instance=0x4d,umask=0x2" },
+	{ .name = "xgmi_link3_data_outbound",
+			.config = "event=0x7,instance=0x4e,umask=0x2" },
+	{ .name = "xgmi_link4_data_outbound",
+			.config = "event=0x7,instance=0x4f,umask=0x2" },
+	{ .name = "xgmi_link5_data_outbound",
+			.config = "event=0x7,instance=0x50,umask=0x2" }
+};
+
+static struct amdgpu_pmu_type arcturus_types[NUM_EVENT_TYPES_ARCTURUS] = {
+	{ .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+				.num_of_type = NUM_EVENTS_ARCTURUS_XGMI }
+};
+
+static struct amdgpu_pmu_config arcturus_config = {
+	.formats = amdgpu_pmu_formats,
+	.num_formats = ARRAY_SIZE(amdgpu_pmu_formats),
+	.events = arcturus_events,
+	.num_events = ARRAY_SIZE(arcturus_events),
+	.types = arcturus_types,
+	.num_types = ARRAY_SIZE(arcturus_types)
+};
+
+/* initialize perf counter */
+static int amdgpu_perf_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* test the event attr type check for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* update the hw_perf_event struct with config data */
+	hwc->config = event->attr.config;
+	hwc->config_base = AMDGPU_PMU_PERF_TYPE_NONE;
+
+	return 0;
+}
+
+/* start perf counter */
+static void amdgpu_perf_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+						  struct amdgpu_pmu_entry,
+						  pmu);
+	int target_cntr = 0;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if ((!pe->adev->df.funcs) ||
+	    (!pe->adev->df.funcs->pmc_start))
+		return;
+
+	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+	hwc->state = 0;
+
+	switch (hwc->config_base) {
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+		if (!(flags & PERF_EF_RELOAD)) {
+			target_cntr = pe->adev->df.funcs->pmc_start(pe->adev,
+						hwc->config, 0 /* unused */,
+						1 /* add counter */);
+			if (target_cntr < 0)
+				break;
+
+			hwc->idx = target_cntr;
+		}
+
+		pe->adev->df.funcs->pmc_start(pe->adev, hwc->config,
+								hwc->idx, 0);
+		break;
+	default:
+		break;
+	}
+
+	perf_event_update_userpage(event);
+}
+
+/* read perf counter */
+static void amdgpu_perf_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+						  struct amdgpu_pmu_entry,
+						  pmu);
+	u64 count, prev;
+
+	if ((!pe->adev->df.funcs) ||
+	    (!pe->adev->df.funcs->pmc_get_count))
+		return;
+
+	prev = local64_read(&hwc->prev_count);
+	do {
+		switch (hwc->config_base) {
+		case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+		case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+			pe->adev->df.funcs->pmc_get_count(pe->adev,
+						hwc->config, hwc->idx, &count);
+			break;
+		default:
+			count = 0;
+			break;
+		}
+	} while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count));
+
+	local64_add(count - prev, &event->count);
+}
+
+/* stop perf counter */
+static void amdgpu_perf_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+						  struct amdgpu_pmu_entry,
+						  pmu);
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	if ((!pe->adev->df.funcs) ||
+	    (!pe->adev->df.funcs->pmc_stop))
+		return;
+
+	switch (hwc->config_base) {
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx,
+									0);
+		break;
+	default:
+		break;
+	}
+
+	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if (hwc->state & PERF_HES_UPTODATE)
+		return;
+
+	amdgpu_perf_read(event);
+	hwc->state |= PERF_HES_UPTODATE;
+}
+
+/* add perf counter */
+static int amdgpu_perf_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int retval = 0, target_cntr;
+	struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+						  struct amdgpu_pmu_entry,
+						  pmu);
+
+	if ((!pe->adev->df.funcs) ||
+	    (!pe->adev->df.funcs->pmc_start))
+		return -EINVAL;
+
+	switch (pe->pmu_perf_type) {
+	case AMDGPU_PMU_PERF_TYPE_DF:
+		hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF;
+		break;
+	case AMDGPU_PMU_PERF_TYPE_ALL:
+		hwc->config_base = (hwc->config >>
+					AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT) &
+					AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK;
+		break;
+	}
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	switch (hwc->config_base) {
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+		target_cntr = pe->adev->df.funcs->pmc_start(pe->adev,
+						hwc->config, 0 /* unused */,
+						1 /* add counter */);
+		if (target_cntr < 0)
+			retval = target_cntr;
+		else
+			hwc->idx = target_cntr;
+
+		break;
+	default:
+		return 0;
+	}
+
+	if (retval)
+		return retval;
+
+	if (flags & PERF_EF_START)
+		amdgpu_perf_start(event, PERF_EF_RELOAD);
+
+	return retval;
+}
+
+/* delete perf counter  */
+static void amdgpu_perf_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+						  struct amdgpu_pmu_entry,
+						  pmu);
+	if ((!pe->adev->df.funcs) ||
+	    (!pe->adev->df.funcs->pmc_stop))
+		return;
+
+	amdgpu_perf_stop(event, PERF_EF_UPDATE);
+
+	switch (hwc->config_base) {
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF:
+	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI:
+		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx,
+									1);
+		break;
+	default:
+		break;
+	}
+
+	perf_event_update_userpage(event);
+}
+
+static void amdgpu_pmu_create_event_attrs_by_type(
+				struct attribute_group *attr_group,
+				struct amdgpu_pmu_event_attribute *pmu_attr,
+				struct amdgpu_pmu_attr events[],
+				int s_offset,
+				int e_offset,
+				unsigned int type)
+{
+	int i;
+
+	pmu_attr += s_offset;
+
+	for (i = s_offset; i < e_offset; i++) {
+		attr_group->attrs[i] = &pmu_attr->attr.attr;
+		sysfs_attr_init(&pmu_attr->attr.attr);
+		pmu_attr->attr.attr.name = events[i].name;
+		pmu_attr->attr.attr.mode = 0444;
+		pmu_attr->attr.show = amdgpu_pmu_event_show;
+		pmu_attr->event_str = events[i].config;
+		pmu_attr->type = type;
+		pmu_attr++;
+	}
+}
+
+static void amdgpu_pmu_create_attrs(struct attribute_group *attr_group,
+				struct amdgpu_pmu_event_attribute *pmu_attr,
+				struct amdgpu_pmu_attr events[],
+				int num_events)
+{
+	amdgpu_pmu_create_event_attrs_by_type(attr_group, pmu_attr, events, 0,
+				num_events, AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE);
+}
+
+
+static int amdgpu_pmu_alloc_pmu_attrs(
+				struct attribute_group *fmt_attr_group,
+				struct amdgpu_pmu_event_attribute **fmt_attr,
+				struct attribute_group *evt_attr_group,
+				struct amdgpu_pmu_event_attribute **evt_attr,
+				struct amdgpu_pmu_config *config)
+{
+	*fmt_attr = kcalloc(config->num_formats, sizeof(**fmt_attr),
+								GFP_KERNEL);
+
+	if (!(*fmt_attr))
+		return -ENOMEM;
+
+	fmt_attr_group->attrs = kcalloc(config->num_formats + 1,
+				sizeof(*fmt_attr_group->attrs), GFP_KERNEL);
+
+	if (!fmt_attr_group->attrs)
+		goto err_fmt_attr_grp;
+
+	*evt_attr = kcalloc(config->num_events, sizeof(**evt_attr), GFP_KERNEL);
+
+	if (!(*evt_attr))
+		goto err_evt_attr;
+
+	evt_attr_group->attrs = kcalloc(config->num_events + 1,
+				sizeof(*evt_attr_group->attrs), GFP_KERNEL);
+
+	if (!evt_attr_group->attrs)
+		goto err_evt_attr_grp;
+
+	return 0;
+err_evt_attr_grp:
+	kfree(*evt_attr);
+err_evt_attr:
+	kfree(fmt_attr_group->attrs);
+err_fmt_attr_grp:
+	kfree(*fmt_attr);
+	return -ENOMEM;
+}
+
+/* init pmu tracking per pmu type */
+static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
+			struct amdgpu_pmu_config *config)
+{
+	const struct attribute_group *attr_groups[] = {
+		&pmu_entry->fmt_attr_group,
+		&pmu_entry->evt_attr_group,
+		NULL
+	};
+	char pmu_name[PMU_NAME_SIZE];
+	int ret = 0, total_num_events = 0;
+
+	pmu_entry->pmu = (struct pmu){
+		.event_init = amdgpu_perf_event_init,
+		.add = amdgpu_perf_add,
+		.del = amdgpu_perf_del,
+		.start = amdgpu_perf_start,
+		.stop = amdgpu_perf_stop,
+		.read = amdgpu_perf_read,
+		.task_ctx_nr = perf_invalid_context,
+	};
+
+	ret = amdgpu_pmu_alloc_pmu_attrs(&pmu_entry->fmt_attr_group,
+					&pmu_entry->fmt_attr,
+					&pmu_entry->evt_attr_group,
+					&pmu_entry->evt_attr,
+					config);
+
+	if (ret)
+		goto err_out;
+
+	amdgpu_pmu_create_attrs(&pmu_entry->fmt_attr_group, pmu_entry->fmt_attr,
+					config->formats, config->num_formats);
+
+	if (pmu_entry->pmu_perf_type == AMDGPU_PMU_PERF_TYPE_ALL) {
+		int i;
+
+		for (i = 0; i < config->num_types; i++) {
+			amdgpu_pmu_create_event_attrs_by_type(
+					&pmu_entry->evt_attr_group,
+					pmu_entry->evt_attr,
+					config->events,
+					total_num_events,
+					total_num_events +
+						config->types[i].num_of_type,
+					config->types[i].type);
+			total_num_events += config->types[i].num_of_type;
+		}
+	} else {
+		amdgpu_pmu_create_attrs(&pmu_entry->evt_attr_group,
+					pmu_entry->evt_attr,
+					config->events, config->num_events);
+		total_num_events = config->num_events;
+	}
+
+	pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+								GFP_KERNEL);
+
+	if (!pmu_entry->pmu.attr_groups) {
+		ret = -ENOMEM;
+		goto err_attr_group;
+	}
+
+	snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix,
+				adev_to_drm(pmu_entry->adev)->primary->index);
+
+	ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1);
+
+	if (ret)
+		goto err_register;
+
+	if (pmu_entry->pmu_perf_type != AMDGPU_PMU_PERF_TYPE_ALL)
+		pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n",
+				pmu_entry->pmu_type_name, total_num_events);
+	else
+		pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events);
+
+
+	list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
+
+	return 0;
+err_register:
+	kfree(pmu_entry->pmu.attr_groups);
+err_attr_group:
+	kfree(pmu_entry->fmt_attr_group.attrs);
+	kfree(pmu_entry->fmt_attr);
+	kfree(pmu_entry->evt_attr_group.attrs);
+	kfree(pmu_entry->evt_attr);
+err_out:
+	pr_warn("Error initializing AMDGPU %s PMUs.\n",
+						pmu_entry->pmu_type_name);
+	return ret;
+}
+
+/* destroy all pmu data associated with target device */
+void amdgpu_pmu_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_pmu_entry *pe, *temp;
+
+	list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
+		if (pe->adev != adev)
+			continue;
+		list_del(&pe->entry);
+		perf_pmu_unregister(&pe->pmu);
+		kfree(pe->pmu.attr_groups);
+		kfree(pe->fmt_attr_group.attrs);
+		kfree(pe->fmt_attr);
+		kfree(pe->evt_attr_group.attrs);
+		kfree(pe->evt_attr);
+		kfree(pe);
+	}
+}
+
+static struct amdgpu_pmu_entry *create_pmu_entry(struct amdgpu_device *adev,
+						unsigned int pmu_type,
+						char *pmu_type_name,
+						char *pmu_file_prefix)
+{
+	struct amdgpu_pmu_entry *pmu_entry;
+
+	pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL);
+
+	if (!pmu_entry)
+		return pmu_entry;
+
+	pmu_entry->adev = adev;
+	pmu_entry->fmt_attr_group.name = "format";
+	pmu_entry->fmt_attr_group.attrs = NULL;
+	pmu_entry->evt_attr_group.name = "events";
+	pmu_entry->evt_attr_group.attrs = NULL;
+	pmu_entry->pmu_perf_type = pmu_type;
+	pmu_entry->pmu_type_name = pmu_type_name;
+	pmu_entry->pmu_file_prefix = pmu_file_prefix;
+
+	return pmu_entry;
+}
+
+/* init amdgpu_pmu */
+int amdgpu_pmu_init(struct amdgpu_device *adev)
+{
+	int ret = 0;
+	struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+		pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF,
+						"DF", "amdgpu_df");
+
+		if (!pmu_entry_df)
+			return -ENOMEM;
+
+		ret = init_pmu_entry_by_type_and_add(pmu_entry_df,
+							&df_vega20_config);
+
+		if (ret) {
+			kfree(pmu_entry_df);
+			return ret;
+		}
+
+		pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL,
+						"", "amdgpu");
+
+		if (!pmu_entry) {
+			amdgpu_pmu_fini(adev);
+			return -ENOMEM;
+		}
+
+		ret = init_pmu_entry_by_type_and_add(pmu_entry,
+							&vega20_config);
+
+		if (ret) {
+			kfree(pmu_entry);
+			amdgpu_pmu_fini(adev);
+			return ret;
+		}
+
+		break;
+	case CHIP_ARCTURUS:
+		pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL,
+						"", "amdgpu");
+		if (!pmu_entry)
+			return -ENOMEM;
+
+		ret = init_pmu_entry_by_type_and_add(pmu_entry,
+							&arcturus_config);
+
+		if (ret) {
+			kfree(pmu_entry);
+			return -ENOMEM;
+		}
+
+		break;
+
+	default:
+		return 0;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
new file mode 100644
index 000000000000..6882dc48c5d8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_PMU_H_
+#define _AMDGPU_PMU_H_
+
+/* PMU types. */
+enum amdgpu_pmu_perf_type {
+	AMDGPU_PMU_PERF_TYPE_NONE = 0,
+	AMDGPU_PMU_PERF_TYPE_DF,
+	AMDGPU_PMU_PERF_TYPE_ALL
+};
+
+/*
+ * PMU type AMDGPU_PMU_PERF_TYPE_ALL can hold events of different "type"
+ * configurations.  Event config types are parsed from the 64-bit raw
+ * config (See EVENT_CONFIG_TYPE_SHIFT and EVENT_CONFIG_TYPE_MASK) and
+ * are registered into the HW perf events config_base.
+ *
+ * PMU types with only a single event configuration type
+ * (non-AMDGPU_PMU_PERF_TYPE_ALL) have their event config type auto generated
+ * when the performance counter is added.
+ */
+enum amdgpu_pmu_event_config_type {
+	AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE = 0,
+	AMDGPU_PMU_EVENT_CONFIG_TYPE_DF,
+	AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI,
+	AMDGPU_PMU_EVENT_CONFIG_TYPE_MAX
+};
+
+#define AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT	56
+#define AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK	0xff
+
+int amdgpu_pmu_init(struct amdgpu_device *adev);
+void amdgpu_pmu_fini(struct amdgpu_device *adev);
+
+#endif /* _AMDGPU_PMU_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
new file mode 100644
index 000000000000..34b5e22b44e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König, Felix Kuehling
+ */
+
+#include "amdgpu.h"
+
+/**
+ * DOC: mem_info_preempt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used preemptible memory.
+ * The file mem_info_preempt_used is used for this, and returns the current
+ * used size of the preemptible block, in bytes
+ */
+static ssize_t mem_info_preempt_used_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+
+	return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+static DEVICE_ATTR_RO(mem_info_preempt_used);
+
+/**
+ * amdgpu_preempt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: TTM memory object
+ *
+ * Dummy, just count the space used without allocating resources or any limit.
+ */
+static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
+				  struct ttm_buffer_object *tbo,
+				  const struct ttm_place *place,
+				  struct ttm_resource **res)
+{
+	*res = kzalloc(sizeof(**res), GFP_KERNEL);
+	if (!*res)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, *res);
+	(*res)->start = AMDGPU_BO_INVALID_OFFSET;
+	return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
+				   struct ttm_resource *res)
+{
+	ttm_resource_fini(man, res);
+	kfree(res);
+}
+
+static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
+	.alloc = amdgpu_preempt_mgr_new,
+	.free = amdgpu_preempt_mgr_del,
+};
+
+/**
+ * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
+{
+	struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+	int ret;
+
+	man->use_tt = true;
+	man->func = &amdgpu_preempt_mgr_func;
+
+	ttm_resource_manager_init(man, &adev->mman.bdev, (1 << 30));
+
+	ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
+	if (ret) {
+		DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
+		return ret;
+	}
+
+	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man);
+	ttm_resource_manager_set_used(man, true);
+	return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
+{
+	struct ttm_resource_manager *man = &adev->mman.preempt_mgr;
+	int ret;
+
+	ttm_resource_manager_set_used(man, false);
+
+	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+	if (ret)
+		return;
+
+	if (adev->dev->kobj.sd)
+		device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
new file mode 100644
index 000000000000..0b10497d487c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -0,0 +1,4533 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_xgmi.h"
+#include "soc15_common.h"
+#include "psp_v3_1.h"
+#include "psp_v10_0.h"
+#include "psp_v11_0.h"
+#include "psp_v11_0_8.h"
+#include "psp_v12_0.h"
+#include "psp_v13_0.h"
+#include "psp_v13_0_4.h"
+#include "psp_v14_0.h"
+
+#include "amdgpu_ras.h"
+#include "amdgpu_securedisplay.h"
+#include "amdgpu_atomfirmware.h"
+
+#define AMD_VBIOS_FILE_MAX_SIZE_B      (1024*1024*16)
+
+static int psp_load_smu_fw(struct psp_context *psp);
+static int psp_rap_terminate(struct psp_context *psp);
+static int psp_securedisplay_terminate(struct psp_context *psp);
+
+static int psp_ring_init(struct psp_context *psp,
+			 enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	struct psp_ring *ring;
+	struct amdgpu_device *adev = psp->adev;
+
+	ring = &psp->km_ring;
+
+	ring->ring_type = ring_type;
+
+	/* allocate 4k Page of Local Frame Buffer memory for ring */
+	ring->ring_size = 0x1000;
+	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->firmware.rbuf,
+				      &ring->ring_mem_mc_addr,
+				      (void **)&ring->ring_mem);
+	if (ret) {
+		ring->ring_size = 0;
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Due to DF Cstate management centralized to PMFW, the firmware
+ * loading sequence will be updated as below:
+ *   - Load KDB
+ *   - Load SYS_DRV
+ *   - Load tOS
+ *   - Load PMFW
+ *   - Setup TMR
+ *   - Load other non-psp fw
+ *   - Load ASD
+ *   - Load XGMI/RAS/HDCP/DTM TA if any
+ *
+ * This new sequence is required for
+ *   - Arcturus and onwards
+ */
+static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+
+	if (amdgpu_sriov_vf(adev)) {
+		psp->pmfw_centralized_cstate_management = false;
+		return;
+	}
+
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 0, 5):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 0, 12):
+	case IP_VERSION(11, 0, 13):
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 7):
+		psp->pmfw_centralized_cstate_management = true;
+		break;
+	default:
+		psp->pmfw_centralized_cstate_management = false;
+		break;
+	}
+}
+
+static int psp_init_sriov_microcode(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+	char ucode_prefix[30];
+	int ret = 0;
+
+	amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 9):
+		adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+		ret = psp_init_cap_microcode(psp, ucode_prefix);
+		break;
+	case IP_VERSION(13, 0, 2):
+		adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+		ret = psp_init_cap_microcode(psp, ucode_prefix);
+		ret &= psp_init_ta_microcode(psp, ucode_prefix);
+		break;
+	case IP_VERSION(13, 0, 0):
+		adev->virt.autoload_ucode_id = 0;
+		break;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 14):
+		ret = psp_init_cap_microcode(psp, ucode_prefix);
+		ret &= psp_init_ta_microcode(psp, ucode_prefix);
+		break;
+	case IP_VERSION(13, 0, 10):
+		adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
+		ret = psp_init_cap_microcode(psp, ucode_prefix);
+		break;
+	case IP_VERSION(13, 0, 12):
+		ret = psp_init_ta_microcode(psp, ucode_prefix);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return ret;
+}
+
+static int psp_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+
+	psp->autoload_supported = true;
+	psp->boot_time_tmr = true;
+
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+		psp_v3_1_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(10, 0, 0):
+	case IP_VERSION(10, 0, 1):
+		psp_v10_0_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 4):
+		psp_v11_0_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 7):
+		adev->psp.sup_pd_fw_up = !amdgpu_sriov_vf(adev);
+		fallthrough;
+	case IP_VERSION(11, 0, 5):
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 0, 12):
+	case IP_VERSION(11, 0, 13):
+		psp_v11_0_set_psp_funcs(psp);
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(12, 0, 1):
+		psp_v12_0_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(13, 0, 2):
+		psp->boot_time_tmr = false;
+		fallthrough;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 14):
+		psp_v13_0_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		break;
+	case IP_VERSION(13, 0, 12):
+		psp_v13_0_set_psp_funcs(psp);
+		psp->autoload_supported = false;
+		adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+		break;
+	case IP_VERSION(13, 0, 1):
+	case IP_VERSION(13, 0, 3):
+	case IP_VERSION(13, 0, 5):
+	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(13, 0, 11):
+	case IP_VERSION(14, 0, 0):
+	case IP_VERSION(14, 0, 1):
+	case IP_VERSION(14, 0, 4):
+		psp_v13_0_set_psp_funcs(psp);
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(11, 0, 8):
+		if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
+			psp_v11_0_8_set_psp_funcs(psp);
+		}
+		psp->autoload_supported = false;
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 7):
+	case IP_VERSION(13, 0, 10):
+		psp_v13_0_set_psp_funcs(psp);
+		adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(13, 0, 4):
+		psp_v13_0_4_set_psp_funcs(psp);
+		psp->boot_time_tmr = false;
+		break;
+	case IP_VERSION(14, 0, 2):
+	case IP_VERSION(14, 0, 3):
+		adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+		psp_v14_0_set_psp_funcs(psp);
+		break;
+	case IP_VERSION(14, 0, 5):
+		psp_v14_0_set_psp_funcs(psp);
+		psp->boot_time_tmr = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	psp->adev = adev;
+
+	adev->psp_timeout = 20000;
+
+	psp_check_pmfw_centralized_cstate_management(psp);
+
+	if (amdgpu_sriov_vf(adev))
+		return psp_init_sriov_microcode(psp);
+	else
+		return psp_init_microcode(psp);
+}
+
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
+{
+	amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
+			      &mem_ctx->shared_buf);
+	mem_ctx->shared_bo = NULL;
+}
+
+static void psp_free_shared_bufs(struct psp_context *psp)
+{
+	void *tmr_buf;
+	void **pptr;
+
+	/* free TMR memory buffer */
+	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+	psp->tmr_bo = NULL;
+
+	/* free xgmi shared memory */
+	psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
+
+	/* free ras shared memory */
+	psp_ta_free_shared_buf(&psp->ras_context.context.mem_context);
+
+	/* free hdcp shared memory */
+	psp_ta_free_shared_buf(&psp->hdcp_context.context.mem_context);
+
+	/* free dtm shared memory */
+	psp_ta_free_shared_buf(&psp->dtm_context.context.mem_context);
+
+	/* free rap shared memory */
+	psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+	/* free securedisplay shared memory */
+	psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+
+
+}
+
+static void psp_memory_training_fini(struct psp_context *psp)
+{
+	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+	kfree(ctx->sys_cache);
+	ctx->sys_cache = NULL;
+}
+
+static int psp_memory_training_init(struct psp_context *psp)
+{
+	int ret;
+	struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+
+	if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) {
+		dev_dbg(psp->adev->dev, "memory training is not supported!\n");
+		return 0;
+	}
+
+	ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL);
+	if (ctx->sys_cache == NULL) {
+		dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n");
+		ret = -ENOMEM;
+		goto Err_out;
+	}
+
+	dev_dbg(psp->adev->dev,
+		"train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+		ctx->train_data_size,
+		ctx->p2c_train_data_offset,
+		ctx->c2p_train_data_offset);
+	ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS;
+	return 0;
+
+Err_out:
+	psp_memory_training_fini(psp);
+	return ret;
+}
+
+/*
+ * Helper funciton to query psp runtime database entry
+ *
+ * @adev: amdgpu_device pointer
+ * @entry_type: the type of psp runtime database entry
+ * @db_entry: runtime database entry pointer
+ *
+ * Return false if runtime database doesn't exit or entry is invalid
+ * or true if the specific database entry is found, and copy to @db_entry
+ */
+static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
+				     enum psp_runtime_entry_type entry_type,
+				     void *db_entry)
+{
+	uint64_t db_header_pos, db_dir_pos;
+	struct psp_runtime_data_header db_header = {0};
+	struct psp_runtime_data_directory db_dir = {0};
+	bool ret = false;
+	int i;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
+		return false;
+
+	db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
+	db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header);
+
+	/* read runtime db header from vram */
+	amdgpu_device_vram_access(adev, db_header_pos, (uint32_t *)&db_header,
+			sizeof(struct psp_runtime_data_header), false);
+
+	if (db_header.cookie != PSP_RUNTIME_DB_COOKIE_ID) {
+		/* runtime db doesn't exist, exit */
+		dev_dbg(adev->dev, "PSP runtime database doesn't exist\n");
+		return false;
+	}
+
+	/* read runtime database entry from vram */
+	amdgpu_device_vram_access(adev, db_dir_pos, (uint32_t *)&db_dir,
+			sizeof(struct psp_runtime_data_directory), false);
+
+	if (db_dir.entry_count >= PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT) {
+		/* invalid db entry count, exit */
+		dev_warn(adev->dev, "Invalid PSP runtime database entry count\n");
+		return false;
+	}
+
+	/* look up for requested entry type */
+	for (i = 0; i < db_dir.entry_count && !ret; i++) {
+		if (db_dir.entry_list[i].entry_type == entry_type) {
+			switch (entry_type) {
+			case PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG:
+				if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_boot_cfg_entry)) {
+					/* invalid db entry size */
+					dev_warn(adev->dev, "Invalid PSP runtime database boot cfg entry size\n");
+					return false;
+				}
+				/* read runtime database entry */
+				amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+							  (uint32_t *)db_entry, sizeof(struct psp_runtime_boot_cfg_entry), false);
+				ret = true;
+				break;
+			case PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS:
+				if (db_dir.entry_list[i].size < sizeof(struct psp_runtime_scpm_entry)) {
+					/* invalid db entry size */
+					dev_warn(adev->dev, "Invalid PSP runtime database scpm entry size\n");
+					return false;
+				}
+				/* read runtime database entry */
+				amdgpu_device_vram_access(adev, db_header_pos + db_dir.entry_list[i].offset,
+							  (uint32_t *)db_entry, sizeof(struct psp_runtime_scpm_entry), false);
+				ret = true;
+				break;
+			default:
+				ret = false;
+				break;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int psp_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+	int ret;
+	struct psp_runtime_boot_cfg_entry boot_cfg_entry;
+	struct psp_memory_training_context *mem_training_ctx = &psp->mem_train_ctx;
+	struct psp_runtime_scpm_entry scpm_entry;
+
+	psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!psp->cmd) {
+		dev_err(adev->dev, "Failed to allocate memory to command buffer!\n");
+		return -ENOMEM;
+	}
+
+	adev->psp.xgmi_context.supports_extended_data =
+		!adev->gmc.xgmi.connected_to_cpu &&
+		amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2);
+
+	memset(&scpm_entry, 0, sizeof(scpm_entry));
+	if ((psp_get_runtime_db_entry(adev,
+				PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS,
+				&scpm_entry)) &&
+	    (scpm_entry.scpm_status != SCPM_DISABLE)) {
+		adev->scpm_enabled = true;
+		adev->scpm_status = scpm_entry.scpm_status;
+	} else {
+		adev->scpm_enabled = false;
+		adev->scpm_status = SCPM_DISABLE;
+	}
+
+	/* TODO: stop gpu driver services and print alarm if scpm is enabled with error status */
+
+	memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry));
+	if (psp_get_runtime_db_entry(adev,
+				PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG,
+				&boot_cfg_entry)) {
+		psp->boot_cfg_bitmask = boot_cfg_entry.boot_cfg_bitmask;
+		if ((psp->boot_cfg_bitmask) &
+		    BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING) {
+			/* If psp runtime database exists, then
+			 * only enable two stage memory training
+			 * when TWO_STAGE_DRAM_TRAINING bit is set
+			 * in runtime database
+			 */
+			mem_training_ctx->enable_mem_training = true;
+		}
+
+	} else {
+		/* If psp runtime database doesn't exist or is
+		 * invalid, force enable two stage memory training
+		 */
+		mem_training_ctx->enable_mem_training = true;
+	}
+
+	if (mem_training_ctx->enable_mem_training) {
+		ret = psp_memory_training_init(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to initialize memory training!\n");
+			return ret;
+		}
+
+		ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT);
+		if (ret) {
+			dev_err(adev->dev, "Failed to process memory training!\n");
+			return ret;
+		}
+	}
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+				      (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr,
+				      &psp->fw_pri_buf);
+	if (ret)
+		return ret;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->fence_buf_bo,
+				      &psp->fence_buf_mc_addr,
+				      &psp->fence_buf);
+	if (ret)
+		goto failed1;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+				      (void **)&psp->cmd_buf_mem);
+	if (ret)
+		goto failed2;
+
+	return 0;
+
+failed2:
+	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+	return ret;
+}
+
+static int psp_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+
+	psp_memory_training_fini(psp);
+
+	amdgpu_ucode_release(&psp->sos_fw);
+	amdgpu_ucode_release(&psp->asd_fw);
+	amdgpu_ucode_release(&psp->ta_fw);
+	amdgpu_ucode_release(&psp->cap_fw);
+	amdgpu_ucode_release(&psp->toc_fw);
+
+	kfree(psp->cmd);
+	psp->cmd = NULL;
+
+	psp_free_shared_bufs(psp);
+
+	if (psp->km_ring.ring_mem)
+		amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+				      &psp->km_ring.ring_mem_mc_addr,
+				      (void **)&psp->km_ring.ring_mem);
+
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+	amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+			      (void **)&psp->cmd_buf_mem);
+
+	return 0;
+}
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val,
+		 uint32_t mask, uint32_t flags)
+{
+	bool check_changed = flags & PSP_WAITREG_CHANGED;
+	bool verbose = !(flags & PSP_WAITREG_NOVERBOSE);
+	uint32_t val;
+	int i;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp->adev->no_hw_access)
+		return 0;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		val = RREG32(reg_index);
+		if (check_changed) {
+			if (val != reg_val)
+				return 0;
+		} else {
+			if ((val & mask) == reg_val)
+				return 0;
+		}
+		udelay(1);
+	}
+
+	if (verbose)
+		dev_err(adev->dev,
+			"psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x",
+			reg_index, mask, val, reg_val);
+
+	return -ETIME;
+}
+
+int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+			       uint32_t reg_val, uint32_t mask, uint32_t msec_timeout)
+{
+	uint32_t val;
+	int i;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp->adev->no_hw_access)
+		return 0;
+
+	for (i = 0; i < msec_timeout; i++) {
+		val = RREG32(reg_index);
+		if ((val & mask) == reg_val)
+			return 0;
+		msleep(1);
+	}
+
+	return -ETIME;
+}
+
+static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
+{
+	switch (cmd_id) {
+	case GFX_CMD_ID_LOAD_TA:
+		return "LOAD_TA";
+	case GFX_CMD_ID_UNLOAD_TA:
+		return "UNLOAD_TA";
+	case GFX_CMD_ID_INVOKE_CMD:
+		return "INVOKE_CMD";
+	case GFX_CMD_ID_LOAD_ASD:
+		return "LOAD_ASD";
+	case GFX_CMD_ID_SETUP_TMR:
+		return "SETUP_TMR";
+	case GFX_CMD_ID_LOAD_IP_FW:
+		return "LOAD_IP_FW";
+	case GFX_CMD_ID_DESTROY_TMR:
+		return "DESTROY_TMR";
+	case GFX_CMD_ID_SAVE_RESTORE:
+		return "SAVE_RESTORE_IP_FW";
+	case GFX_CMD_ID_SETUP_VMR:
+		return "SETUP_VMR";
+	case GFX_CMD_ID_DESTROY_VMR:
+		return "DESTROY_VMR";
+	case GFX_CMD_ID_PROG_REG:
+		return "PROG_REG";
+	case GFX_CMD_ID_GET_FW_ATTESTATION:
+		return "GET_FW_ATTESTATION";
+	case GFX_CMD_ID_LOAD_TOC:
+		return "ID_LOAD_TOC";
+	case GFX_CMD_ID_AUTOLOAD_RLC:
+		return "AUTOLOAD_RLC";
+	case GFX_CMD_ID_BOOT_CFG:
+		return "BOOT_CFG";
+	case GFX_CMD_ID_CONFIG_SQ_PERFMON:
+		return "CONFIG_SQ_PERFMON";
+	case GFX_CMD_ID_FB_FW_RESERV_ADDR:
+		return "FB_FW_RESERV_ADDR";
+	case GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR:
+		return "FB_FW_RESERV_EXT_ADDR";
+	case GFX_CMD_ID_SRIOV_SPATIAL_PART:
+		return "SPATIAL_PARTITION";
+	case GFX_CMD_ID_FB_NPS_MODE:
+		return "NPS_MODE_CHANGE";
+	default:
+		return "UNKNOWN CMD";
+	}
+}
+
+static bool psp_err_warn(struct psp_context *psp)
+{
+	struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+	/* This response indicates reg list is already loaded */
+	if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+	    cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+	    cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+	    cmd->resp.status == TEE_ERROR_CANCEL)
+		return false;
+
+	return true;
+}
+
+static int
+psp_cmd_submit_buf(struct psp_context *psp,
+		   struct amdgpu_firmware_info *ucode,
+		   struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
+{
+	int ret;
+	int index;
+	int timeout = psp->adev->psp_timeout;
+	bool ras_intr = false;
+	bool skip_unsupport = false;
+
+	if (psp->adev->no_hw_access)
+		return 0;
+
+	memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
+
+	memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
+
+	index = atomic_inc_return(&psp->fence_value);
+	ret = psp_ring_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);
+	if (ret) {
+		atomic_dec(&psp->fence_value);
+		goto exit;
+	}
+
+	amdgpu_device_invalidate_hdp(psp->adev, NULL);
+	while (*((unsigned int *)psp->fence_buf) != index) {
+		if (--timeout == 0)
+			break;
+		/*
+		 * Shouldn't wait for timeout when err_event_athub occurs,
+		 * because gpu reset thread triggered and lock resource should
+		 * be released for psp resume sequence.
+		 */
+		ras_intr = amdgpu_ras_intr_triggered();
+		if (ras_intr)
+			break;
+		usleep_range(10, 100);
+		amdgpu_device_invalidate_hdp(psp->adev, NULL);
+	}
+
+	/* We allow TEE_ERROR_NOT_SUPPORTED for VMR command and PSP_ERR_UNKNOWN_COMMAND in SRIOV */
+	skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||
+		psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev);
+
+	memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp));
+
+	/* In some cases, psp response status is not 0 even there is no
+	 * problem while the command is submitted. Some version of PSP FW
+	 * doesn't write 0 to that field.
+	 * So here we would like to only print a warning instead of an error
+	 * during psp initialization to avoid breaking hw_init and it doesn't
+	 * return -EINVAL.
+	 */
+	if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
+		if (ucode)
+			dev_warn(psp->adev->dev,
+				 "failed to load ucode %s(0x%X) ",
+				 amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+		if (psp_err_warn(psp))
+			dev_warn(
+				psp->adev->dev,
+				"psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+				psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+				psp->cmd_buf_mem->cmd_id,
+				psp->cmd_buf_mem->resp.status);
+		/* If any firmware (including CAP) load fails under SRIOV, it should
+		 * return failure to stop the VF from initializing.
+		 * Also return failure in case of timeout
+		 */
+		if ((ucode && amdgpu_sriov_vf(psp->adev)) || !timeout) {
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+
+	if (ucode) {
+		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
+		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
+	}
+
+exit:
+	return ret;
+}
+
+static struct psp_gfx_cmd_resp *acquire_psp_cmd_buf(struct psp_context *psp)
+{
+	struct psp_gfx_cmd_resp *cmd = psp->cmd;
+
+	mutex_lock(&psp->mutex);
+
+	memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
+
+	return cmd;
+}
+
+static void release_psp_cmd_buf(struct psp_context *psp)
+{
+	mutex_unlock(&psp->mutex);
+}
+
+static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
+				 struct psp_gfx_cmd_resp *cmd,
+				 uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
+{
+	struct amdgpu_device *adev = psp->adev;
+	uint32_t size = 0;
+	uint64_t tmr_pa = 0;
+
+	if (tmr_bo) {
+		size = amdgpu_bo_size(tmr_bo);
+		tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo);
+	}
+
+	if (amdgpu_sriov_vf(psp->adev))
+		cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
+	else
+		cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
+	cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
+	cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
+	cmd->cmd.cmd_setup_tmr.buf_size = size;
+	cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1;
+	cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa);
+	cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa);
+}
+
+static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				      uint64_t pri_buf_mc, uint32_t size)
+{
+	cmd->cmd_id = GFX_CMD_ID_LOAD_TOC;
+	cmd->cmd.cmd_load_toc.toc_phy_addr_lo = lower_32_bits(pri_buf_mc);
+	cmd->cmd.cmd_load_toc.toc_phy_addr_hi = upper_32_bits(pri_buf_mc);
+	cmd->cmd.cmd_load_toc.toc_size = size;
+}
+
+/* Issue LOAD TOC cmd to PSP to part toc and calculate tmr size needed */
+static int psp_load_toc(struct psp_context *psp,
+			uint32_t *tmr_size)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	/* Copy toc to psp firmware private buffer */
+	psp_copy_fw(psp, psp->toc.start_addr, psp->toc.size_bytes);
+
+	psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc.size_bytes);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+	if (!ret)
+		*tmr_size = psp->cmd_buf_mem->resp.tmr_size;
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+/* Set up Trusted Memory Region */
+static int psp_tmr_init(struct psp_context *psp)
+{
+	int ret = 0;
+	int tmr_size;
+	void *tmr_buf;
+	void **pptr;
+
+	/*
+	 * According to HW engineer, they prefer the TMR address be "naturally
+	 * aligned" , e.g. the start address be an integer divide of TMR size.
+	 *
+	 * Note: this memory need be reserved till the driver
+	 * uninitializes.
+	 */
+	tmr_size = PSP_TMR_SIZE(psp->adev);
+
+	/* For ASICs support RLC autoload, psp will parse the toc
+	 * and calculate the total size of TMR needed
+	 */
+	if (!amdgpu_sriov_vf(psp->adev) &&
+	    psp->toc.start_addr &&
+	    psp->toc.size_bytes &&
+	    psp->fw_pri_buf) {
+		ret = psp_load_toc(psp, &tmr_size);
+		if (ret) {
+			dev_err(psp->adev->dev, "Failed to load toc\n");
+			return ret;
+		}
+	}
+
+	if (!psp->tmr_bo && !psp->boot_time_tmr) {
+		pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+		ret = amdgpu_bo_create_kernel(psp->adev, tmr_size,
+					      PSP_TMR_ALIGNMENT,
+					      AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM,
+					      &psp->tmr_bo, &psp->tmr_mc_addr,
+					      pptr);
+	}
+	if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) && psp->tmr_bo)
+		psp->tmr_mc_addr = amdgpu_bo_fb_aper_addr(psp->tmr_bo);
+
+	return ret;
+}
+
+static bool psp_skip_tmr(struct psp_context *psp)
+{
+	switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int psp_tmr_load(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	/* For Navi12 and CHIP_SIENNA_CICHLID SRIOV, do not set up TMR.
+	 * Already set up by host driver.
+	 */
+	if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
+	if (psp->tmr_bo)
+		dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n",
+			 amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
+					struct psp_gfx_cmd_resp *cmd)
+{
+	if (amdgpu_sriov_vf(psp->adev))
+		cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
+	else
+		cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR;
+}
+
+static int psp_tmr_unload(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	/* skip TMR unload for Navi12 and CHIP_SIENNA_CICHLID SRIOV,
+	 * as TMR is not loaded at all
+	 */
+	if (amdgpu_sriov_vf(psp->adev) && psp_skip_tmr(psp))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	psp_prep_tmr_unload_cmd_buf(psp, cmd);
+	dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_tmr_terminate(struct psp_context *psp)
+{
+	return psp_tmr_unload(psp);
+}
+
+int psp_get_fw_attestation_records_addr(struct psp_context *psp,
+					uint64_t *output_ptr)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	if (!output_ptr)
+		return -EINVAL;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	if (!ret) {
+		*output_ptr = ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_lo) +
+			      ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32);
+	}
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_get_fw_reservation_info(struct psp_context *psp,
+						   uint32_t cmd_id,
+						   uint64_t *addr,
+						   uint32_t *size)
+{
+	int ret;
+	uint32_t status;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = cmd_id;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+	if (ret) {
+		release_psp_cmd_buf(psp);
+		return ret;
+	}
+
+	status = cmd->resp.status;
+	if (status == PSP_ERR_UNKNOWN_COMMAND) {
+		release_psp_cmd_buf(psp);
+		*addr = 0;
+		*size = 0;
+		return 0;
+	}
+
+	*addr = (uint64_t)cmd->resp.uresp.fw_reserve_info.reserve_base_address_hi << 32 |
+		cmd->resp.uresp.fw_reserve_info.reserve_base_address_lo;
+	*size = cmd->resp.uresp.fw_reserve_info.reserve_size;
+
+	release_psp_cmd_buf(psp);
+
+	return 0;
+}
+
+int psp_update_fw_reservation(struct psp_context *psp)
+{
+	int ret;
+	uint64_t reserv_addr, reserv_addr_ext;
+	uint32_t reserv_size, reserv_size_ext, mp0_ip_ver;
+	struct amdgpu_device *adev = psp->adev;
+
+	mp0_ip_ver = amdgpu_ip_version(adev, MP0_HWIP, 0);
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	switch (mp0_ip_ver) {
+	case IP_VERSION(14, 0, 2):
+		if (adev->psp.sos.fw_version < 0x3b0e0d)
+			return 0;
+		break;
+
+	case IP_VERSION(14, 0, 3):
+		if (adev->psp.sos.fw_version < 0x3a0e14)
+			return 0;
+		break;
+
+	default:
+		return 0;
+	}
+
+	ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_ADDR, &reserv_addr, &reserv_size);
+	if (ret)
+		return ret;
+	ret = psp_get_fw_reservation_info(psp, GFX_CMD_ID_FB_FW_RESERV_EXT_ADDR, &reserv_addr_ext, &reserv_size_ext);
+	if (ret)
+		return ret;
+
+	if (reserv_addr != adev->gmc.real_vram_size - reserv_size) {
+		dev_warn(adev->dev, "reserve fw region is not valid!\n");
+		return 0;
+	}
+
+	amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+
+	reserv_size = roundup(reserv_size, SZ_1M);
+
+	ret = amdgpu_bo_create_kernel_at(adev, reserv_addr, reserv_size, &adev->mman.fw_reserved_memory, NULL);
+	if (ret) {
+		dev_err(adev->dev, "reserve fw region failed(%d)!\n", ret);
+		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, NULL);
+		return ret;
+	}
+
+	reserv_size_ext = roundup(reserv_size_ext, SZ_1M);
+
+	ret = amdgpu_bo_create_kernel_at(adev, reserv_addr_ext, reserv_size_ext,
+					 &adev->mman.fw_reserved_memory_extend, NULL);
+	if (ret) {
+		dev_err(adev->dev, "reserve extend fw region failed(%d)!\n", ret);
+		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL, NULL);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int psp_boot_config_get(struct amdgpu_device *adev, uint32_t *boot_cfg)
+{
+	struct psp_context *psp = &adev->psp;
+	struct psp_gfx_cmd_resp *cmd;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+	cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_GET;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+	if (!ret) {
+		*boot_cfg =
+			(cmd->resp.uresp.boot_cfg.boot_cfg & BOOT_CONFIG_GECC) ? 1 : 0;
+	}
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_boot_config_set(struct amdgpu_device *adev, uint32_t boot_cfg)
+{
+	int ret;
+	struct psp_context *psp = &adev->psp;
+	struct psp_gfx_cmd_resp *cmd;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_BOOT_CFG;
+	cmd->cmd.boot_cfg.sub_cmd = BOOTCFG_CMD_SET;
+	cmd->cmd.boot_cfg.boot_config = boot_cfg;
+	cmd->cmd.boot_cfg.boot_config_valid = boot_cfg;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_rl_load(struct amdgpu_device *adev)
+{
+	int ret;
+	struct psp_context *psp = &adev->psp;
+	struct psp_gfx_cmd_resp *cmd;
+
+	if (!is_psp_fw_valid(psp->rl))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, psp->rl.start_addr, psp->rl.size_bytes);
+
+	cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(psp->fw_pri_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(psp->fw_pri_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_size = psp->rl.size_bytes;
+	cmd->cmd.cmd_load_ip_fw.fw_type = GFX_FW_TYPE_REG_LIST;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+int psp_memory_partition(struct psp_context *psp, int mode)
+{
+	struct psp_gfx_cmd_resp *cmd;
+	int ret;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE;
+	cmd->cmd.cmd_memory_part.mode = mode;
+
+	dev_info(psp->adev->dev,
+		 "Requesting %d memory partition change through PSP", mode);
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+	if (ret)
+		dev_err(psp->adev->dev,
+			"PSP request failed to change to NPS%d mode\n", mode);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+int psp_spatial_partition(struct psp_context *psp, int mode)
+{
+	struct psp_gfx_cmd_resp *cmd;
+	int ret;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART;
+	cmd->cmd.cmd_spatial_part.mode = mode;
+
+	dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode);
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_asd_initialize(struct psp_context *psp)
+{
+	int ret;
+
+	/* If PSP version doesn't match ASD version, asd loading will be failed.
+	 * add workaround to bypass it for sriov now.
+	 * TODO: add version check to make it common
+	 */
+	if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
+		return 0;
+
+	/* bypass asd if display hardware is not available */
+	if (!amdgpu_device_has_display_hardware(psp->adev) &&
+	    amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10))
+		return 0;
+
+	psp->asd_context.mem_context.shared_mc_addr  = 0;
+	psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
+	psp->asd_context.ta_load_type                = GFX_CMD_ID_LOAD_ASD;
+
+	ret = psp_ta_load(psp, &psp->asd_context);
+	if (!ret)
+		psp->asd_context.initialized = true;
+
+	return ret;
+}
+
+static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				       uint32_t session_id)
+{
+	cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
+	cmd->cmd.cmd_unload_ta.session_id = session_id;
+}
+
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	psp_prep_ta_unload_cmd_buf(cmd, context->session_id);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	context->resp_status = cmd->resp.status;
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_asd_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->asd_context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->asd_context);
+	if (!ret)
+		psp->asd_context.initialized = false;
+
+	return ret;
+}
+
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+		uint32_t id, uint32_t value)
+{
+	cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+	cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+	cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+		uint32_t value)
+{
+	struct psp_gfx_cmd_resp *cmd;
+	int ret = 0;
+
+	if (reg >= PSP_REG_LAST)
+		return -EINVAL;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+	if (ret)
+		dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				     uint64_t ta_bin_mc,
+				     struct ta_context *context)
+{
+	cmd->cmd_id				= context->ta_load_type;
+	cmd->cmd.cmd_load_ta.app_phy_addr_lo	= lower_32_bits(ta_bin_mc);
+	cmd->cmd.cmd_load_ta.app_phy_addr_hi	= upper_32_bits(ta_bin_mc);
+	cmd->cmd.cmd_load_ta.app_len		= context->bin_desc.size_bytes;
+
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo =
+		lower_32_bits(context->mem_context.shared_mc_addr);
+	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi =
+		upper_32_bits(context->mem_context.shared_mc_addr);
+	cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size;
+}
+
+int psp_ta_init_shared_buf(struct psp_context *psp,
+				  struct ta_mem_context *mem_ctx)
+{
+	/*
+	 * Allocate 16k memory aligned to 4k from Frame Buffer (local
+	 * physical) for ta to host memory
+	 */
+	return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &mem_ctx->shared_bo,
+				      &mem_ctx->shared_mc_addr,
+				      &mem_ctx->shared_buf);
+}
+
+static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+				       uint32_t ta_cmd_id,
+				       uint32_t session_id)
+{
+	cmd->cmd_id				= GFX_CMD_ID_INVOKE_CMD;
+	cmd->cmd.cmd_invoke_cmd.session_id	= session_id;
+	cmd->cmd.cmd_invoke_cmd.ta_cmd_id	= ta_cmd_id;
+}
+
+int psp_ta_invoke(struct psp_context *psp,
+		  uint32_t ta_cmd_id,
+		  struct ta_context *context)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	psp_prep_ta_invoke_cmd_buf(cmd, ta_cmd_id, context->session_id);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	context->resp_status = cmd->resp.status;
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+int psp_ta_load(struct psp_context *psp, struct ta_context *context)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = acquire_psp_cmd_buf(psp);
+
+	psp_copy_fw(psp, context->bin_desc.start_addr,
+		    context->bin_desc.size_bytes);
+
+	if (amdgpu_virt_xgmi_migrate_enabled(psp->adev) &&
+		context->mem_context.shared_bo)
+		context->mem_context.shared_mc_addr =
+			amdgpu_bo_fb_aper_addr(context->mem_context.shared_bo);
+
+	psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, context);
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	context->resp_status = cmd->resp.status;
+
+	if (!ret)
+		context->session_id = cmd->resp.session_id;
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	return psp_ta_invoke(psp, ta_cmd_id, &psp->xgmi_context.context);
+}
+
+int psp_xgmi_terminate(struct psp_context *psp)
+{
+	int ret;
+	struct amdgpu_device *adev = psp->adev;
+
+	/* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+	    (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+	     adev->gmc.xgmi.connected_to_cpu))
+		return 0;
+
+	if (!psp->xgmi_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->xgmi_context.context);
+
+	psp->xgmi_context.context.initialized = false;
+
+	return ret;
+}
+
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	int ret;
+
+	if (!psp->ta_fw ||
+	    !psp->xgmi_context.context.bin_desc.size_bytes ||
+	    !psp->xgmi_context.context.bin_desc.start_addr)
+		return -ENOENT;
+
+	if (!load_ta)
+		goto invoke;
+
+	psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE;
+	psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->xgmi_context.context.mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	/* Load XGMI TA */
+	ret = psp_ta_load(psp, &psp->xgmi_context.context);
+	if (!ret)
+		psp->xgmi_context.context.initialized = true;
+	else
+		return ret;
+
+invoke:
+	/* Initialize XGMI session */
+	xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.context.mem_context.shared_buf);
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+	xgmi_cmd->flag_extend_link_record = set_extended_data;
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
+
+	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+	/* note down the capbility flag for XGMI TA */
+	psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag;
+
+	return ret;
+}
+
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	int ret;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID;
+
+	/* Invoke xgmi ta to get hive id */
+	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+	if (ret)
+		return ret;
+
+	*hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
+
+	return 0;
+}
+
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	int ret;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID;
+
+	/* Invoke xgmi ta to get the node id */
+	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+	if (ret)
+		return ret;
+
+	*node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id;
+
+	return 0;
+}
+
+static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp)
+{
+	return (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+			IP_VERSION(13, 0, 2) &&
+		psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) ||
+	       amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >=
+		       IP_VERSION(13, 0, 6);
+}
+
+/*
+ * Chips that support extended topology information require the driver to
+ * reflect topology information in the opposite direction.  This is
+ * because the TA has already exceeded its link record limit and if the
+ * TA holds bi-directional information, the driver would have to do
+ * multiple fetches instead of just two.
+ */
+static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
+					struct psp_xgmi_node_info node_info)
+{
+	struct amdgpu_device *mirror_adev;
+	struct amdgpu_hive_info *hive;
+	uint64_t src_node_id = psp->adev->gmc.xgmi.node_id;
+	uint64_t dst_node_id = node_info.node_id;
+	uint8_t dst_num_hops = node_info.num_hops;
+	uint8_t dst_is_sharing_enabled = node_info.is_sharing_enabled;
+	uint8_t dst_num_links = node_info.num_links;
+
+	hive = amdgpu_get_xgmi_hive(psp->adev);
+	if (WARN_ON(!hive))
+		return;
+
+	list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
+		struct psp_xgmi_topology_info *mirror_top_info;
+		int j;
+
+		if (mirror_adev->gmc.xgmi.node_id != dst_node_id)
+			continue;
+
+		mirror_top_info = &mirror_adev->psp.xgmi_context.top_info;
+		for (j = 0; j < mirror_top_info->num_nodes; j++) {
+			if (mirror_top_info->nodes[j].node_id != src_node_id)
+				continue;
+
+			mirror_top_info->nodes[j].num_hops = dst_num_hops;
+			mirror_top_info->nodes[j].is_sharing_enabled = dst_is_sharing_enabled;
+			/* prevent 0 num_links value re-reflection since reflection
+			 * criteria is based on num_hops (direct or indirect).
+			 */
+			if (dst_num_links) {
+				mirror_top_info->nodes[j].num_links = dst_num_links;
+				/* swap src and dst due to frame of reference */
+				for (int k = 0; k < dst_num_links; k++) {
+					mirror_top_info->nodes[j].port_num[k].src_xgmi_port_num =
+						node_info.port_num[k].dst_xgmi_port_num;
+					mirror_top_info->nodes[j].port_num[k].dst_xgmi_port_num =
+						node_info.port_num[k].src_xgmi_port_num;
+				}
+			}
+
+			break;
+		}
+
+		break;
+	}
+
+	amdgpu_put_xgmi_hive(hive);
+}
+
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology,
+			       bool get_extended_data)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+	struct ta_xgmi_cmd_get_topology_info_output *topology_info_output;
+	int i;
+	int ret;
+
+	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+		return -EINVAL;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+	xgmi_cmd->flag_extend_link_record = get_extended_data;
+
+	/* Fill in the shared memory with topology information as input */
+	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO;
+	topology_info_input->num_nodes = number_devices;
+
+	for (i = 0; i < topology_info_input->num_nodes; i++) {
+		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+		topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled;
+		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+	}
+
+	/* Invoke xgmi ta to get the topology information */
+	ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO);
+	if (ret)
+		return ret;
+
+	/* Read the output topology information from the shared memory */
+	topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info;
+	topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes;
+	for (i = 0; i < topology->num_nodes; i++) {
+		/* extended data will either be 0 or equal to non-extended data */
+		if (topology_info_output->nodes[i].num_hops)
+			topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops;
+
+		/* non-extended data gets everything here so no need to update */
+		if (!get_extended_data) {
+			topology->nodes[i].node_id = topology_info_output->nodes[i].node_id;
+			topology->nodes[i].is_sharing_enabled =
+					topology_info_output->nodes[i].is_sharing_enabled;
+			topology->nodes[i].sdma_engine =
+					topology_info_output->nodes[i].sdma_engine;
+		}
+
+	}
+
+	/* Invoke xgmi ta again to get the link information */
+	if (psp_xgmi_peer_link_info_supported(psp)) {
+		struct ta_xgmi_cmd_get_peer_link_info *link_info_output;
+		struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output;
+		bool requires_reflection =
+			(psp->xgmi_context.supports_extended_data &&
+			 get_extended_data) ||
+			amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+				IP_VERSION(13, 0, 6) ||
+			amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
+				IP_VERSION(13, 0, 14) ||
+			amdgpu_sriov_vf(psp->adev);
+		bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG ||
+			amdgpu_sriov_xgmi_ta_ext_peer_link_en(psp->adev);
+
+		/* popluate the shared output buffer rather than the cmd input buffer
+		 * with node_ids as the input for GET_PEER_LINKS command execution.
+		 * This is required for GET_PEER_LINKS per xgmi ta implementation.
+		 * The same requirement for GET_EXTEND_PEER_LINKS command.
+		 */
+		if (ta_port_num_support) {
+			link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info;
+
+			for (i = 0; i < topology->num_nodes; i++)
+				link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+			link_extend_info_output->num_nodes = topology->num_nodes;
+			xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS;
+		} else {
+			link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info;
+
+			for (i = 0; i < topology->num_nodes; i++)
+				link_info_output->nodes[i].node_id = topology->nodes[i].node_id;
+
+			link_info_output->num_nodes = topology->num_nodes;
+			xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS;
+		}
+
+		ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < topology->num_nodes; i++) {
+			uint8_t node_num_links = ta_port_num_support ?
+				link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links;
+			/* accumulate num_links on extended data */
+			if (get_extended_data) {
+				topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links;
+			} else {
+				topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
+								topology->nodes[i].num_links : node_num_links;
+			}
+			/* popluate the connected port num info if supported and available */
+			if (ta_port_num_support && topology->nodes[i].num_links) {
+				memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+				       sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+			}
+
+			/* reflect the topology information for bi-directionality */
+			if (requires_reflection && topology->nodes[i].num_hops)
+				psp_xgmi_reflect_topology_info(psp, topology->nodes[i]);
+		}
+	}
+
+	return 0;
+}
+
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology)
+{
+	struct ta_xgmi_shared_memory *xgmi_cmd;
+	struct ta_xgmi_cmd_get_topology_info_input *topology_info_input;
+	int i;
+
+	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)
+		return -EINVAL;
+
+	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.context.mem_context.shared_buf;
+	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
+
+	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info;
+	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO;
+	topology_info_input->num_nodes = number_devices;
+
+	for (i = 0; i < topology_info_input->num_nodes; i++) {
+		topology_info_input->nodes[i].node_id = topology->nodes[i].node_id;
+		topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops;
+		topology_info_input->nodes[i].is_sharing_enabled = 1;
+		topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine;
+	}
+
+	/* Invoke xgmi ta to set topology information */
+	return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
+}
+
+// ras begin
+static void psp_ras_ta_check_status(struct psp_context *psp)
+{
+	struct ta_ras_shared_memory *ras_cmd =
+		(struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+
+	switch (ras_cmd->ras_status) {
+	case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP:
+		dev_warn(psp->adev->dev,
+			 "RAS WARNING: cmd failed due to unsupported ip\n");
+		break;
+	case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ:
+		dev_warn(psp->adev->dev,
+			 "RAS WARNING: cmd failed due to unsupported error injection\n");
+		break;
+	case TA_RAS_STATUS__SUCCESS:
+		break;
+	case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED:
+		if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR)
+			dev_warn(psp->adev->dev,
+				 "RAS WARNING: Inject error to critical region is not allowed\n");
+		break;
+	default:
+		dev_warn(psp->adev->dev,
+			 "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status);
+		break;
+	}
+}
+
+static int psp_ras_send_cmd(struct psp_context *psp,
+		enum ras_command cmd_id, void *in, void *out)
+{
+	struct ta_ras_shared_memory *ras_cmd;
+	uint32_t cmd = cmd_id;
+	int ret = 0;
+
+	if (!in)
+		return -EINVAL;
+
+	mutex_lock(&psp->ras_context.mutex);
+	ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+	memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+	switch (cmd) {
+	case TA_RAS_COMMAND__ENABLE_FEATURES:
+	case TA_RAS_COMMAND__DISABLE_FEATURES:
+		memcpy(&ras_cmd->ras_in_message,
+			in, sizeof(ras_cmd->ras_in_message));
+		break;
+	case TA_RAS_COMMAND__TRIGGER_ERROR:
+		memcpy(&ras_cmd->ras_in_message.trigger_error,
+			in, sizeof(ras_cmd->ras_in_message.trigger_error));
+		break;
+	case TA_RAS_COMMAND__QUERY_ADDRESS:
+		memcpy(&ras_cmd->ras_in_message.address,
+			in, sizeof(ras_cmd->ras_in_message.address));
+		break;
+	default:
+		dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	ras_cmd->cmd_id = cmd;
+	ret = psp_ras_invoke(psp, ras_cmd->cmd_id);
+
+	switch (cmd) {
+	case TA_RAS_COMMAND__TRIGGER_ERROR:
+		if (!ret && out)
+			memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status));
+		break;
+	case TA_RAS_COMMAND__QUERY_ADDRESS:
+		if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status)
+			ret = -EINVAL;
+		else if (out)
+			memcpy(out,
+				&ras_cmd->ras_out_message.address,
+				sizeof(ras_cmd->ras_out_message.address));
+		break;
+	default:
+		break;
+	}
+
+err_out:
+	mutex_unlock(&psp->ras_context.mutex);
+
+	return ret;
+}
+
+int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	struct ta_ras_shared_memory *ras_cmd;
+	int ret;
+
+	ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	ret = psp_ta_invoke(psp, ta_cmd_id, &psp->ras_context.context);
+
+	if (amdgpu_ras_intr_triggered())
+		return ret;
+
+	if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) {
+		dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n");
+		return -EINVAL;
+	}
+
+	if (!ret) {
+		if (ras_cmd->ras_out_message.flags.err_inject_switch_disable_flag) {
+			dev_warn(psp->adev->dev, "ECC switch disabled\n");
+
+			ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE;
+		} else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag)
+			dev_warn(psp->adev->dev,
+				 "RAS internal register access blocked\n");
+
+		psp_ras_ta_check_status(psp);
+	}
+
+	return ret;
+}
+
+int psp_ras_enable_features(struct psp_context *psp,
+		union ta_ras_cmd_input *info, bool enable)
+{
+	enum ras_command cmd_id;
+	int ret;
+
+	if (!psp->ras_context.context.initialized || !info)
+		return -EINVAL;
+
+	cmd_id = enable ?
+		TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES;
+	ret = psp_ras_send_cmd(psp, cmd_id, info, NULL);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
+}
+
+int psp_ras_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->ras_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->ras_context.context);
+
+	psp->ras_context.context.initialized = false;
+
+	mutex_destroy(&psp->ras_context.mutex);
+
+	return ret;
+}
+
+int psp_ras_initialize(struct psp_context *psp)
+{
+	int ret;
+	uint32_t boot_cfg = 0xFF;
+	struct amdgpu_device *adev = psp->adev;
+	struct ta_ras_shared_memory *ras_cmd;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	if (!adev->psp.ras_context.context.bin_desc.size_bytes ||
+	    !adev->psp.ras_context.context.bin_desc.start_addr) {
+		dev_info(adev->dev, "RAS: optional ras ta ucode is not available\n");
+		return 0;
+	}
+
+	if (amdgpu_atomfirmware_dynamic_boot_config_supported(adev)) {
+		/* query GECC enablement status from boot config
+		 * boot_cfg: 1: GECC is enabled or 0: GECC is disabled
+		 */
+		ret = psp_boot_config_get(adev, &boot_cfg);
+		if (ret)
+			dev_warn(adev->dev, "PSP get boot config failed\n");
+
+		if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+		    amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+			dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+			dev_warn(adev->dev,
+				"To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
+		} else {
+			if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+				amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+				if (boot_cfg == 1) {
+					dev_info(adev->dev, "GECC is enabled\n");
+				} else {
+					/* enable GECC in next boot cycle if it is disabled
+					 * in boot config, or force enable GECC if failed to
+					 * get boot configuration
+					 */
+					ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+					if (ret)
+						dev_warn(adev->dev, "PSP set boot config failed\n");
+					else
+						dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+				}
+			} else {
+				if (!boot_cfg) {
+					if (!adev->ras_default_ecc_enabled &&
+					    amdgpu_ras_enable != 1 &&
+					    amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+						dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+					else
+						dev_info(adev->dev, "GECC is disabled\n");
+				} else {
+					/* disable GECC in next boot cycle if ras is
+					 * disabled by module parameter amdgpu_ras_enable
+					 * and/or amdgpu_ras_mask, or boot_config_get call
+					 * is failed
+					 */
+					ret = psp_boot_config_set(adev, 0);
+					if (ret)
+						dev_warn(adev->dev, "PSP set boot config failed\n");
+					else
+						dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+				}
+			}
+		}
+	}
+
+	psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;
+	psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->ras_context.context.mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf;
+	memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory));
+
+	if (amdgpu_ras_is_poison_mode_supported(adev))
+		ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
+	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
+		ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+	ras_cmd->ras_in_message.init_flags.xcc_mask =
+		adev->gfx.xcc_mask;
+	ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+	if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+		ras_cmd->ras_in_message.init_flags.nps_mode =
+			adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
+
+	ret = psp_ta_load(psp, &psp->ras_context.context);
+
+	if (!ret && !ras_cmd->ras_status) {
+		psp->ras_context.context.initialized = true;
+		mutex_init(&psp->ras_context.mutex);
+	} else {
+		if (ras_cmd->ras_status)
+			dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
+
+		/* fail to load RAS TA */
+		psp->ras_context.context.initialized = false;
+	}
+
+	return ret;
+}
+
+int psp_ras_trigger_error(struct psp_context *psp,
+			  struct ta_ras_trigger_error_input *info, uint32_t instance_mask)
+{
+	struct amdgpu_device *adev = psp->adev;
+	int ret;
+	uint32_t dev_mask;
+	uint32_t ras_status = 0;
+
+	if (!psp->ras_context.context.initialized || !info)
+		return -EINVAL;
+
+	switch (info->block_id) {
+	case TA_RAS_BLOCK__GFX:
+		dev_mask = GET_MASK(GC, instance_mask);
+		break;
+	case TA_RAS_BLOCK__SDMA:
+		dev_mask = GET_MASK(SDMA0, instance_mask);
+		break;
+	case TA_RAS_BLOCK__VCN:
+	case TA_RAS_BLOCK__JPEG:
+		dev_mask = GET_MASK(VCN, instance_mask);
+		break;
+	default:
+		dev_mask = instance_mask;
+		break;
+	}
+
+	/* reuse sub_block_index for backward compatibility */
+	dev_mask <<= AMDGPU_RAS_INST_SHIFT;
+	dev_mask &= AMDGPU_RAS_INST_MASK;
+	info->sub_block_index |= dev_mask;
+
+	ret = psp_ras_send_cmd(psp,
+			TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status);
+	if (ret)
+		return -EINVAL;
+
+	/* If err_event_athub occurs error inject was successful, however
+	 *  return status from TA is no long reliable
+	 */
+	if (amdgpu_ras_intr_triggered())
+		return 0;
+
+	if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED)
+		return -EACCES;
+	else if (ras_status)
+		return -EINVAL;
+
+	return 0;
+}
+
+int psp_ras_query_address(struct psp_context *psp,
+			  struct ta_ras_query_address_input *addr_in,
+			  struct ta_ras_query_address_output *addr_out)
+{
+	int ret;
+
+	if (!psp->ras_context.context.initialized ||
+		!addr_in || !addr_out)
+		return -EINVAL;
+
+	ret = psp_ras_send_cmd(psp,
+			TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out);
+
+	return ret;
+}
+// ras end
+
+// HDCP start
+static int psp_hdcp_initialize(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	/* bypass hdcp initialization if dmu is harvested */
+	if (!amdgpu_device_has_display_hardware(psp->adev))
+		return 0;
+
+	if (!psp->hdcp_context.context.bin_desc.size_bytes ||
+	    !psp->hdcp_context.context.bin_desc.start_addr) {
+		dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
+		return 0;
+	}
+
+	psp->hdcp_context.context.mem_context.shared_mem_size = PSP_HDCP_SHARED_MEM_SIZE;
+	psp->hdcp_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->hdcp_context.context.mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &psp->hdcp_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_ta_load(psp, &psp->hdcp_context.context);
+	if (!ret) {
+		psp->hdcp_context.context.initialized = true;
+		mutex_init(&psp->hdcp_context.mutex);
+	}
+
+	return ret;
+}
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->hdcp_context.context.initialized)
+		return 0;
+
+	return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
+}
+
+static int psp_hdcp_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->hdcp_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->hdcp_context.context);
+
+	psp->hdcp_context.context.initialized = false;
+
+	return ret;
+}
+// HDCP end
+
+// DTM start
+static int psp_dtm_initialize(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	/* bypass dtm initialization if dmu is harvested */
+	if (!amdgpu_device_has_display_hardware(psp->adev))
+		return 0;
+
+	if (!psp->dtm_context.context.bin_desc.size_bytes ||
+	    !psp->dtm_context.context.bin_desc.start_addr) {
+		dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
+		return 0;
+	}
+
+	psp->dtm_context.context.mem_context.shared_mem_size = PSP_DTM_SHARED_MEM_SIZE;
+	psp->dtm_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->dtm_context.context.mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &psp->dtm_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_ta_load(psp, &psp->dtm_context.context);
+	if (!ret) {
+		psp->dtm_context.context.initialized = true;
+		mutex_init(&psp->dtm_context.mutex);
+	}
+
+	return ret;
+}
+
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	/*
+	 * TODO: bypass the loading in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->dtm_context.context.initialized)
+		return 0;
+
+	return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
+}
+
+static int psp_dtm_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO: bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->dtm_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->dtm_context.context);
+
+	psp->dtm_context.context.initialized = false;
+
+	return ret;
+}
+// DTM end
+
+// RAP start
+static int psp_rap_initialize(struct psp_context *psp)
+{
+	int ret;
+	enum ta_rap_status status = TA_RAP_STATUS__SUCCESS;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->rap_context.context.bin_desc.size_bytes ||
+	    !psp->rap_context.context.bin_desc.start_addr) {
+		dev_info(psp->adev->dev, "RAP: optional rap ta ucode is not available\n");
+		return 0;
+	}
+
+	psp->rap_context.context.mem_context.shared_mem_size = PSP_RAP_SHARED_MEM_SIZE;
+	psp->rap_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->rap_context.context.mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &psp->rap_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_ta_load(psp, &psp->rap_context.context);
+	if (!ret) {
+		psp->rap_context.context.initialized = true;
+		mutex_init(&psp->rap_context.mutex);
+	} else
+		return ret;
+
+	ret = psp_rap_invoke(psp, TA_CMD_RAP__INITIALIZE, &status);
+	if (ret || status != TA_RAP_STATUS__SUCCESS) {
+		psp_rap_terminate(psp);
+		/* free rap shared memory */
+		psp_ta_free_shared_buf(&psp->rap_context.context.mem_context);
+
+		dev_warn(psp->adev->dev, "RAP TA initialize fail (%d) status %d.\n",
+			 ret, status);
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static int psp_rap_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	if (!psp->rap_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->rap_context.context);
+
+	psp->rap_context.context.initialized = false;
+
+	return ret;
+}
+
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status)
+{
+	struct ta_rap_shared_memory *rap_cmd;
+	int ret = 0;
+
+	if (!psp->rap_context.context.initialized)
+		return 0;
+
+	if (ta_cmd_id != TA_CMD_RAP__INITIALIZE &&
+	    ta_cmd_id != TA_CMD_RAP__VALIDATE_L0)
+		return -EINVAL;
+
+	mutex_lock(&psp->rap_context.mutex);
+
+	rap_cmd = (struct ta_rap_shared_memory *)
+		  psp->rap_context.context.mem_context.shared_buf;
+	memset(rap_cmd, 0, sizeof(struct ta_rap_shared_memory));
+
+	rap_cmd->cmd_id = ta_cmd_id;
+	rap_cmd->validation_method_id = METHOD_A;
+
+	ret = psp_ta_invoke(psp, rap_cmd->cmd_id, &psp->rap_context.context);
+	if (ret)
+		goto out_unlock;
+
+	if (status)
+		*status = rap_cmd->rap_status;
+
+out_unlock:
+	mutex_unlock(&psp->rap_context.mutex);
+
+	return ret;
+}
+// RAP end
+
+/* securedisplay start */
+static int psp_securedisplay_initialize(struct psp_context *psp)
+{
+	int ret;
+	struct ta_securedisplay_cmd *securedisplay_cmd;
+
+	/*
+	 * TODO: bypass the initialize in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	/* bypass securedisplay initialization if dmu is harvested */
+	if (!amdgpu_device_has_display_hardware(psp->adev))
+		return 0;
+
+	if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
+	    !psp->securedisplay_context.context.bin_desc.start_addr) {
+		dev_info(psp->adev->dev,
+			 "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
+		return 0;
+	}
+
+	psp->securedisplay_context.context.mem_context.shared_mem_size =
+		PSP_SECUREDISPLAY_SHARED_MEM_SIZE;
+	psp->securedisplay_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+
+	if (!psp->securedisplay_context.context.initialized) {
+		ret = psp_ta_init_shared_buf(psp,
+					     &psp->securedisplay_context.context.mem_context);
+		if (ret)
+			return ret;
+	}
+
+	ret = psp_ta_load(psp, &psp->securedisplay_context.context);
+	if (!ret && !psp->securedisplay_context.context.resp_status) {
+		psp->securedisplay_context.context.initialized = true;
+		mutex_init(&psp->securedisplay_context.mutex);
+	} else {
+		/* don't try again */
+		psp->securedisplay_context.context.bin_desc.size_bytes = 0;
+		return ret;
+	}
+
+	mutex_lock(&psp->securedisplay_context.mutex);
+
+	psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+			TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+	ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+	mutex_unlock(&psp->securedisplay_context.mutex);
+
+	if (ret) {
+		psp_securedisplay_terminate(psp);
+		/* free securedisplay shared memory */
+		psp_ta_free_shared_buf(&psp->securedisplay_context.context.mem_context);
+		dev_err(psp->adev->dev, "SECUREDISPLAY TA initialize fail.\n");
+		return -EINVAL;
+	}
+
+	if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS) {
+		psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+		dev_err(psp->adev->dev, "SECUREDISPLAY: query securedisplay TA failed. ret 0x%x\n",
+			securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+		/* don't try again */
+		psp->securedisplay_context.context.bin_desc.size_bytes = 0;
+	}
+
+	return 0;
+}
+
+static int psp_securedisplay_terminate(struct psp_context *psp)
+{
+	int ret;
+
+	/*
+	 * TODO:bypass the terminate in sriov for now
+	 */
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (!psp->securedisplay_context.context.initialized)
+		return 0;
+
+	ret = psp_ta_unload(psp, &psp->securedisplay_context.context);
+
+	psp->securedisplay_context.context.initialized = false;
+
+	return ret;
+}
+
+int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
+{
+	int ret;
+
+	if (!psp->securedisplay_context.context.initialized)
+		return -EINVAL;
+
+	if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
+	    ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+	    ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
+		return -EINVAL;
+
+	ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
+
+	return ret;
+}
+/* SECUREDISPLAY end */
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+	struct psp_context *psp = &adev->psp;
+	int ret = 0;
+
+	if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+		ret = psp->funcs->wait_for_bootloader(psp);
+
+	return ret;
+}
+
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
+{
+	if (psp->funcs &&
+	    psp->funcs->get_ras_capability) {
+		return psp->funcs->get_ras_capability(psp);
+	} else {
+		return false;
+	}
+}
+
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)
+{
+	struct psp_context *psp = &adev->psp;
+
+	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+		return false;
+
+	if (psp->funcs && psp->funcs->is_reload_needed)
+		return psp->funcs->is_reload_needed(psp);
+
+	return false;
+}
+
+static void psp_update_gpu_addresses(struct amdgpu_device *adev)
+{
+	struct psp_context *psp = &adev->psp;
+
+	if (psp->cmd_buf_bo && psp->cmd_buf_mem) {
+		psp->fw_pri_mc_addr = amdgpu_bo_fb_aper_addr(psp->fw_pri_bo);
+		psp->fence_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->fence_buf_bo);
+		psp->cmd_buf_mc_addr = amdgpu_bo_fb_aper_addr(psp->cmd_buf_bo);
+	}
+	if (adev->firmware.rbuf && psp->km_ring.ring_mem)
+		psp->km_ring.ring_mem_mc_addr = amdgpu_bo_fb_aper_addr(adev->firmware.rbuf);
+}
+
+static int psp_hw_start(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+	int ret;
+
+	if (amdgpu_virt_xgmi_migrate_enabled(adev))
+		psp_update_gpu_addresses(adev);
+
+	if (!amdgpu_sriov_vf(adev)) {
+		if ((is_psp_fw_valid(psp->kdb)) &&
+		    (psp->funcs->bootloader_load_kdb != NULL)) {
+			ret = psp_bootloader_load_kdb(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load kdb failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->spl)) &&
+		    (psp->funcs->bootloader_load_spl != NULL)) {
+			ret = psp_bootloader_load_spl(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load spl failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->sys)) &&
+		    (psp->funcs->bootloader_load_sysdrv != NULL)) {
+			ret = psp_bootloader_load_sysdrv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load sys drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->soc_drv)) &&
+		    (psp->funcs->bootloader_load_soc_drv != NULL)) {
+			ret = psp_bootloader_load_soc_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load soc drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->intf_drv)) &&
+		    (psp->funcs->bootloader_load_intf_drv != NULL)) {
+			ret = psp_bootloader_load_intf_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load intf drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->dbg_drv)) &&
+		    (psp->funcs->bootloader_load_dbg_drv != NULL)) {
+			ret = psp_bootloader_load_dbg_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load dbg drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->ras_drv)) &&
+		    (psp->funcs->bootloader_load_ras_drv != NULL)) {
+			ret = psp_bootloader_load_ras_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load ras_drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->ipkeymgr_drv)) &&
+		    (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) {
+			ret = psp_bootloader_load_ipkeymgr_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->spdm_drv)) &&
+		    (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+			ret = psp_bootloader_load_spdm_drv(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load spdm_drv failed!\n");
+				return ret;
+			}
+		}
+
+		if ((is_psp_fw_valid(psp->sos)) &&
+		    (psp->funcs->bootloader_load_sos != NULL)) {
+			ret = psp_bootloader_load_sos(psp);
+			if (ret) {
+				dev_err(adev->dev, "PSP load sos failed!\n");
+				return ret;
+			}
+		}
+	}
+
+	ret = psp_ring_create(psp, PSP_RING_TYPE__KM);
+	if (ret) {
+		dev_err(adev->dev, "PSP create ring failed!\n");
+		return ret;
+	}
+
+	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+		ret = psp_update_fw_reservation(psp);
+		if (ret) {
+			dev_err(adev->dev, "update fw reservation failed!\n");
+			return ret;
+		}
+	}
+
+	if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev))
+		goto skip_pin_bo;
+
+	if (!psp->boot_time_tmr || psp->autoload_supported) {
+		ret = psp_tmr_init(psp);
+		if (ret) {
+			dev_err(adev->dev, "PSP tmr init failed!\n");
+			return ret;
+		}
+	}
+
+skip_pin_bo:
+	/*
+	 * For ASICs with DF Cstate management centralized
+	 * to PMFW, TMR setup should be performed after PMFW
+	 * loaded and before other non-psp firmware loaded.
+	 */
+	if (psp->pmfw_centralized_cstate_management) {
+		ret = psp_load_smu_fw(psp);
+		if (ret)
+			return ret;
+	}
+
+	if (!psp->boot_time_tmr || !psp->autoload_supported) {
+		ret = psp_tmr_load(psp);
+		if (ret) {
+			dev_err(adev->dev, "PSP load tmr failed!\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
+			   enum psp_gfx_fw_type *type)
+{
+	switch (ucode->ucode_id) {
+	case AMDGPU_UCODE_ID_CAP:
+		*type = GFX_FW_TYPE_CAP;
+		break;
+	case AMDGPU_UCODE_ID_SDMA0:
+		*type = GFX_FW_TYPE_SDMA0;
+		break;
+	case AMDGPU_UCODE_ID_SDMA1:
+		*type = GFX_FW_TYPE_SDMA1;
+		break;
+	case AMDGPU_UCODE_ID_SDMA2:
+		*type = GFX_FW_TYPE_SDMA2;
+		break;
+	case AMDGPU_UCODE_ID_SDMA3:
+		*type = GFX_FW_TYPE_SDMA3;
+		break;
+	case AMDGPU_UCODE_ID_SDMA4:
+		*type = GFX_FW_TYPE_SDMA4;
+		break;
+	case AMDGPU_UCODE_ID_SDMA5:
+		*type = GFX_FW_TYPE_SDMA5;
+		break;
+	case AMDGPU_UCODE_ID_SDMA6:
+		*type = GFX_FW_TYPE_SDMA6;
+		break;
+	case AMDGPU_UCODE_ID_SDMA7:
+		*type = GFX_FW_TYPE_SDMA7;
+		break;
+	case AMDGPU_UCODE_ID_CP_MES:
+		*type = GFX_FW_TYPE_CP_MES;
+		break;
+	case AMDGPU_UCODE_ID_CP_MES_DATA:
+		*type = GFX_FW_TYPE_MES_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_MES1:
+		*type = GFX_FW_TYPE_CP_MES_KIQ;
+		break;
+	case AMDGPU_UCODE_ID_CP_MES1_DATA:
+		*type = GFX_FW_TYPE_MES_KIQ_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_CE:
+		*type = GFX_FW_TYPE_CP_CE;
+		break;
+	case AMDGPU_UCODE_ID_CP_PFP:
+		*type = GFX_FW_TYPE_CP_PFP;
+		break;
+	case AMDGPU_UCODE_ID_CP_ME:
+		*type = GFX_FW_TYPE_CP_ME;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1:
+		*type = GFX_FW_TYPE_CP_MEC;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		*type = GFX_FW_TYPE_CP_MEC_ME1;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2:
+		*type = GFX_FW_TYPE_CP_MEC;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2_JT:
+		*type = GFX_FW_TYPE_CP_MEC_ME2;
+		break;
+	case AMDGPU_UCODE_ID_RLC_P:
+		*type = GFX_FW_TYPE_RLC_P;
+		break;
+	case AMDGPU_UCODE_ID_RLC_V:
+		*type = GFX_FW_TYPE_RLC_V;
+		break;
+	case AMDGPU_UCODE_ID_RLC_G:
+		*type = GFX_FW_TYPE_RLC_G;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_IRAM:
+		*type = GFX_FW_TYPE_RLC_IRAM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_DRAM:
+		*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
+		break;
+	case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+		*type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
+		break;
+	case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+		*type = GFX_FW_TYPE_SE0_TAP_DELAYS;
+		break;
+	case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+		*type = GFX_FW_TYPE_SE1_TAP_DELAYS;
+		break;
+	case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+		*type = GFX_FW_TYPE_SE2_TAP_DELAYS;
+		break;
+	case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+		*type = GFX_FW_TYPE_SE3_TAP_DELAYS;
+		break;
+	case AMDGPU_UCODE_ID_SMC:
+		*type = GFX_FW_TYPE_SMU;
+		break;
+	case AMDGPU_UCODE_ID_PPTABLE:
+		*type = GFX_FW_TYPE_PPTABLE;
+		break;
+	case AMDGPU_UCODE_ID_UVD:
+		*type = GFX_FW_TYPE_UVD;
+		break;
+	case AMDGPU_UCODE_ID_UVD1:
+		*type = GFX_FW_TYPE_UVD1;
+		break;
+	case AMDGPU_UCODE_ID_VCE:
+		*type = GFX_FW_TYPE_VCE;
+		break;
+	case AMDGPU_UCODE_ID_VCN:
+		*type = GFX_FW_TYPE_VCN;
+		break;
+	case AMDGPU_UCODE_ID_VCN1:
+		*type = GFX_FW_TYPE_VCN1;
+		break;
+	case AMDGPU_UCODE_ID_DMCU_ERAM:
+		*type = GFX_FW_TYPE_DMCU_ERAM;
+		break;
+	case AMDGPU_UCODE_ID_DMCU_INTV:
+		*type = GFX_FW_TYPE_DMCU_ISR;
+		break;
+	case AMDGPU_UCODE_ID_VCN0_RAM:
+		*type = GFX_FW_TYPE_VCN0_RAM;
+		break;
+	case AMDGPU_UCODE_ID_VCN1_RAM:
+		*type = GFX_FW_TYPE_VCN1_RAM;
+		break;
+	case AMDGPU_UCODE_ID_DMCUB:
+		*type = GFX_FW_TYPE_DMUB;
+		break;
+	case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+	case AMDGPU_UCODE_ID_SDMA_RS64:
+		*type = GFX_FW_TYPE_SDMA_UCODE_TH0;
+		break;
+	case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+		*type = GFX_FW_TYPE_SDMA_UCODE_TH1;
+		break;
+	case AMDGPU_UCODE_ID_IMU_I:
+		*type = GFX_FW_TYPE_IMU_I;
+		break;
+	case AMDGPU_UCODE_ID_IMU_D:
+		*type = GFX_FW_TYPE_IMU_D;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_PFP:
+		*type = GFX_FW_TYPE_RS64_PFP;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_ME:
+		*type = GFX_FW_TYPE_RS64_ME;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC:
+		*type = GFX_FW_TYPE_RS64_MEC;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+		*type = GFX_FW_TYPE_RS64_PFP_P0_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+		*type = GFX_FW_TYPE_RS64_PFP_P1_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+		*type = GFX_FW_TYPE_RS64_ME_P0_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+		*type = GFX_FW_TYPE_RS64_ME_P1_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+		*type = GFX_FW_TYPE_RS64_MEC_P0_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+		*type = GFX_FW_TYPE_RS64_MEC_P1_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+		*type = GFX_FW_TYPE_RS64_MEC_P2_STACK;
+		break;
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+		*type = GFX_FW_TYPE_RS64_MEC_P3_STACK;
+		break;
+	case AMDGPU_UCODE_ID_VPE_CTX:
+		*type = GFX_FW_TYPE_VPEC_FW1;
+		break;
+	case AMDGPU_UCODE_ID_VPE_CTL:
+		*type = GFX_FW_TYPE_VPEC_FW2;
+		break;
+	case AMDGPU_UCODE_ID_VPE:
+		*type = GFX_FW_TYPE_VPE;
+		break;
+	case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+		*type = GFX_FW_TYPE_UMSCH_UCODE;
+		break;
+	case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+		*type = GFX_FW_TYPE_UMSCH_DATA;
+		break;
+	case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+		*type = GFX_FW_TYPE_UMSCH_CMD_BUFFER;
+		break;
+	case AMDGPU_UCODE_ID_P2S_TABLE:
+		*type = GFX_FW_TYPE_P2S_TABLE;
+		break;
+	case AMDGPU_UCODE_ID_JPEG_RAM:
+		*type = GFX_FW_TYPE_JPEG_RAM;
+		break;
+	case AMDGPU_UCODE_ID_ISP:
+		*type = GFX_FW_TYPE_ISP;
+		break;
+	case AMDGPU_UCODE_ID_MAXIMUM:
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void psp_print_fw_hdr(struct psp_context *psp,
+			     struct amdgpu_firmware_info *ucode)
+{
+	struct amdgpu_device *adev = psp->adev;
+	struct common_firmware_header *hdr;
+
+	switch (ucode->ucode_id) {
+	case AMDGPU_UCODE_ID_SDMA0:
+	case AMDGPU_UCODE_ID_SDMA1:
+	case AMDGPU_UCODE_ID_SDMA2:
+	case AMDGPU_UCODE_ID_SDMA3:
+	case AMDGPU_UCODE_ID_SDMA4:
+	case AMDGPU_UCODE_ID_SDMA5:
+	case AMDGPU_UCODE_ID_SDMA6:
+	case AMDGPU_UCODE_ID_SDMA7:
+		hdr = (struct common_firmware_header *)
+			adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+		amdgpu_ucode_print_sdma_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_CP_CE:
+		hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+		amdgpu_ucode_print_gfx_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_CP_PFP:
+		hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+		amdgpu_ucode_print_gfx_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_CP_ME:
+		hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+		amdgpu_ucode_print_gfx_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1:
+		hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+		amdgpu_ucode_print_gfx_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_RLC_G:
+		hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+		amdgpu_ucode_print_rlc_hdr(hdr);
+		break;
+	case AMDGPU_UCODE_ID_SMC:
+		hdr = (struct common_firmware_header *)adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(hdr);
+		break;
+	default:
+		break;
+	}
+}
+
+static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp,
+				       struct amdgpu_firmware_info *ucode,
+				       struct psp_gfx_cmd_resp *cmd)
+{
+	int ret;
+	uint64_t fw_mem_mc_addr = ucode->mc_addr;
+
+	cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
+
+	ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
+	if (ret)
+		dev_err(psp->adev->dev, "Unknown firmware type\n");
+
+	return ret;
+}
+
+int psp_execute_ip_fw_load(struct psp_context *psp,
+			   struct amdgpu_firmware_info *ucode)
+{
+	int ret = 0;
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd);
+	if (!ret) {
+		ret = psp_cmd_submit_buf(psp, ucode, cmd,
+					 psp->fence_buf_mc_addr);
+	}
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+static int psp_load_p2s_table(struct psp_context *psp)
+{
+	int ret;
+	struct amdgpu_device *adev = psp->adev;
+	struct amdgpu_firmware_info *ucode =
+		&adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
+
+	if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+				(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+		return 0;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+		uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D :
+								0x0036003C;
+		if (psp->sos.fw_version < supp_vers)
+			return 0;
+	}
+
+	if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	ret = psp_execute_ip_fw_load(psp, ucode);
+
+	return ret;
+}
+
+static int psp_load_smu_fw(struct psp_context *psp)
+{
+	int ret;
+	struct amdgpu_device *adev = psp->adev;
+	struct amdgpu_firmware_info *ucode =
+			&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+	struct amdgpu_ras *ras = psp->ras_context.ras;
+
+	/*
+	 * Skip SMU FW reloading in case of using BACO for runpm only,
+	 * as SMU is always alive.
+	 */
+	if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+				(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
+		return 0;
+
+	if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if ((amdgpu_in_reset(adev) && ras && adev->ras_enabled &&
+	     (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) ||
+	      amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) {
+		ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
+		if (ret)
+			dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n");
+	}
+
+	ret = psp_execute_ip_fw_load(psp, ucode);
+
+	if (ret)
+		dev_err(adev->dev, "PSP load smu failed!\n");
+
+	return ret;
+}
+
+static bool fw_load_skip_check(struct psp_context *psp,
+			       struct amdgpu_firmware_info *ucode)
+{
+	if (!ucode->fw || !ucode->ucode_size)
+		return true;
+
+	if (ucode->ucode_id == AMDGPU_UCODE_ID_P2S_TABLE)
+		return true;
+
+	if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+	    (psp_smu_reload_quirk(psp) ||
+	     psp->autoload_supported ||
+	     psp->pmfw_centralized_cstate_management))
+		return true;
+
+	if (amdgpu_sriov_vf(psp->adev) &&
+	    amdgpu_virt_fw_load_skip_check(psp->adev, ucode->ucode_id))
+		return true;
+
+	if (psp->autoload_supported &&
+	    (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
+	     ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
+		/* skip mec JT when autoload is enabled */
+		return true;
+
+	return false;
+}
+
+int psp_load_fw_list(struct psp_context *psp,
+		     struct amdgpu_firmware_info **ucode_list, int ucode_count)
+{
+	int ret = 0, i;
+	struct amdgpu_firmware_info *ucode;
+
+	for (i = 0; i < ucode_count; ++i) {
+		ucode = ucode_list[i];
+		psp_print_fw_hdr(psp, ucode);
+		ret = psp_execute_ip_fw_load(psp, ucode);
+		if (ret)
+			return ret;
+	}
+	return ret;
+}
+
+static int psp_load_non_psp_fw(struct psp_context *psp)
+{
+	int i, ret;
+	struct amdgpu_firmware_info *ucode;
+	struct amdgpu_device *adev = psp->adev;
+
+	if (psp->autoload_supported &&
+	    !psp->pmfw_centralized_cstate_management) {
+		ret = psp_load_smu_fw(psp);
+		if (ret)
+			return ret;
+	}
+
+	/* Load P2S table first if it's available */
+	psp_load_p2s_table(psp);
+
+	for (i = 0; i < adev->firmware.max_ucodes; i++) {
+		ucode = &adev->firmware.ucode[i];
+
+		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+		    !fw_load_skip_check(psp, ucode)) {
+			ret = psp_load_smu_fw(psp);
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		if (fw_load_skip_check(psp, ucode))
+			continue;
+
+		if (psp->autoload_supported &&
+		    (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+			     IP_VERSION(11, 0, 7) ||
+		     amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+			     IP_VERSION(11, 0, 11) ||
+		     amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+			     IP_VERSION(11, 0, 12)) &&
+		    (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||
+		     ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||
+		     ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3))
+			/* PSP only receive one SDMA fw for sienna_cichlid,
+			 * as all four sdma fw are same
+			 */
+			continue;
+
+		psp_print_fw_hdr(psp, ucode);
+
+		ret = psp_execute_ip_fw_load(psp, ucode);
+		if (ret)
+			return ret;
+
+		/* Start rlc autoload after psp received all the gfx firmware */
+		if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+		    adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) {
+			ret = psp_rlc_autoload_start(psp);
+			if (ret) {
+				dev_err(adev->dev, "Failed to start rlc autoload\n");
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int psp_load_fw(struct amdgpu_device *adev)
+{
+	int ret;
+	struct psp_context *psp = &adev->psp;
+
+	if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+		/* should not destroy ring, only stop */
+		psp_ring_stop(psp, PSP_RING_TYPE__KM);
+	} else {
+		memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
+
+		ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+		if (ret) {
+			dev_err(adev->dev, "PSP ring init failed!\n");
+			goto failed;
+		}
+	}
+
+	ret = psp_hw_start(psp);
+	if (ret)
+		goto failed;
+
+	ret = psp_load_non_psp_fw(psp);
+	if (ret)
+		goto failed1;
+
+	ret = psp_asd_initialize(psp);
+	if (ret) {
+		dev_err(adev->dev, "PSP load asd failed!\n");
+		goto failed1;
+	}
+
+	ret = psp_rl_load(adev);
+	if (ret) {
+		dev_err(adev->dev, "PSP load RL failed!\n");
+		goto failed1;
+	}
+
+	if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+		if (adev->gmc.xgmi.num_physical_nodes > 1) {
+			ret = psp_xgmi_initialize(psp, false, true);
+			/* Warning the XGMI seesion initialize failure
+			 * Instead of stop driver initialization
+			 */
+			if (ret)
+				dev_err(psp->adev->dev,
+					"XGMI: Failed to initialize XGMI session\n");
+		}
+	}
+
+	if (psp->ta_fw) {
+		ret = psp_ras_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"RAS: Failed to initialize RAS\n");
+
+		ret = psp_hdcp_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"HDCP: Failed to initialize HDCP\n");
+
+		ret = psp_dtm_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"DTM: Failed to initialize DTM\n");
+
+		ret = psp_rap_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"RAP: Failed to initialize RAP\n");
+
+		ret = psp_securedisplay_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"SECUREDISPLAY: Failed to initialize SECUREDISPLAY\n");
+	}
+
+	return 0;
+
+failed1:
+	psp_free_shared_bufs(psp);
+failed:
+	/*
+	 * all cleanup jobs (xgmi terminate, ras terminate,
+	 * ring destroy, cmd/fence/fw buffers destory,
+	 * psp->cmd destory) are delayed to psp_hw_fini
+	 */
+	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+	return ret;
+}
+
+static int psp_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int ret;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	mutex_lock(&adev->firmware.mutex);
+
+	ret = amdgpu_ucode_init_bo(adev);
+	if (ret)
+		goto failed;
+
+	ret = psp_load_fw(adev);
+	if (ret) {
+		dev_err(adev->dev, "PSP firmware loading failed\n");
+		goto failed;
+	}
+
+	mutex_unlock(&adev->firmware.mutex);
+	return 0;
+
+failed:
+	adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
+	mutex_unlock(&adev->firmware.mutex);
+	return -EINVAL;
+}
+
+static int psp_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+
+	if (psp->ta_fw) {
+		psp_ras_terminate(psp);
+		psp_securedisplay_terminate(psp);
+		psp_rap_terminate(psp);
+		psp_dtm_terminate(psp);
+		psp_hdcp_terminate(psp);
+
+		if (adev->gmc.xgmi.num_physical_nodes > 1)
+			psp_xgmi_terminate(psp);
+	}
+
+	psp_asd_terminate(psp);
+	psp_tmr_terminate(psp);
+
+	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+
+	return 0;
+}
+
+static int psp_suspend(struct amdgpu_ip_block *ip_block)
+{
+	int ret = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+	    psp->xgmi_context.context.initialized) {
+		ret = psp_xgmi_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate xgmi ta\n");
+			goto out;
+		}
+	}
+
+	if (psp->ta_fw) {
+		ret = psp_ras_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate ras ta\n");
+			goto out;
+		}
+		ret = psp_hdcp_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate hdcp ta\n");
+			goto out;
+		}
+		ret = psp_dtm_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate dtm ta\n");
+			goto out;
+		}
+		ret = psp_rap_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate rap ta\n");
+			goto out;
+		}
+		ret = psp_securedisplay_terminate(psp);
+		if (ret) {
+			dev_err(adev->dev, "Failed to terminate securedisplay ta\n");
+			goto out;
+		}
+	}
+
+	ret = psp_asd_terminate(psp);
+	if (ret) {
+		dev_err(adev->dev, "Failed to terminate asd\n");
+		goto out;
+	}
+
+	ret = psp_tmr_terminate(psp);
+	if (ret) {
+		dev_err(adev->dev, "Failed to terminate tmr\n");
+		goto out;
+	}
+
+	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
+	if (ret)
+		dev_err(adev->dev, "PSP ring stop failed\n");
+
+out:
+	return ret;
+}
+
+static int psp_resume(struct amdgpu_ip_block *ip_block)
+{
+	int ret;
+	struct amdgpu_device *adev = ip_block->adev;
+	struct psp_context *psp = &adev->psp;
+
+	dev_info(adev->dev, "PSP is resuming...\n");
+
+	if (psp->mem_train_ctx.enable_mem_training) {
+		ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME);
+		if (ret) {
+			dev_err(adev->dev, "Failed to process memory training!\n");
+			return ret;
+		}
+	}
+
+	mutex_lock(&adev->firmware.mutex);
+
+	ret = amdgpu_ucode_init_bo(adev);
+	if (ret)
+		goto failed;
+
+	ret = psp_hw_start(psp);
+	if (ret)
+		goto failed;
+
+	ret = psp_load_non_psp_fw(psp);
+	if (ret)
+		goto failed;
+
+	ret = psp_asd_initialize(psp);
+	if (ret) {
+		dev_err(adev->dev, "PSP load asd failed!\n");
+		goto failed;
+	}
+
+	ret = psp_rl_load(adev);
+	if (ret) {
+		dev_err(adev->dev, "PSP load RL failed!\n");
+		goto failed;
+	}
+
+	if (adev->gmc.xgmi.num_physical_nodes > 1) {
+		ret = psp_xgmi_initialize(psp, false, true);
+		/* Warning the XGMI seesion initialize failure
+		 * Instead of stop driver initialization
+		 */
+		if (ret)
+			dev_err(psp->adev->dev,
+				"XGMI: Failed to initialize XGMI session\n");
+	}
+
+	if (psp->ta_fw) {
+		ret = psp_ras_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"RAS: Failed to initialize RAS\n");
+
+		ret = psp_hdcp_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"HDCP: Failed to initialize HDCP\n");
+
+		ret = psp_dtm_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"DTM: Failed to initialize DTM\n");
+
+		ret = psp_rap_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"RAP: Failed to initialize RAP\n");
+
+		ret = psp_securedisplay_initialize(psp);
+		if (ret)
+			dev_err(psp->adev->dev,
+				"SECUREDISPLAY: Failed to initialize SECUREDISPLAY\n");
+	}
+
+	mutex_unlock(&adev->firmware.mutex);
+
+	return 0;
+
+failed:
+	dev_err(adev->dev, "PSP resume failed\n");
+	mutex_unlock(&adev->firmware.mutex);
+	return ret;
+}
+
+int psp_gpu_reset(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
+	mutex_lock(&adev->psp.mutex);
+	ret = psp_mode1_reset(&adev->psp);
+	mutex_unlock(&adev->psp.mutex);
+
+	return ret;
+}
+
+int psp_rlc_autoload_start(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	release_psp_cmd_buf(psp);
+
+	return ret;
+}
+
+int psp_ring_cmd_submit(struct psp_context *psp,
+			uint64_t cmd_buf_mc_addr,
+			uint64_t fence_mc_addr,
+			int index)
+{
+	unsigned int psp_write_ptr_reg = 0;
+	struct psp_gfx_rb_frame *write_frame;
+	struct psp_ring *ring = &psp->km_ring;
+	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+	struct amdgpu_device *adev = psp->adev;
+	uint32_t ring_size_dw = ring->ring_size / 4;
+	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+	/* KM (GPCOM) prepare write pointer */
+	psp_write_ptr_reg = psp_ring_get_wptr(psp);
+
+	/* Update KM RB frame pointer to new frame */
+	/* write_frame ptr increments by size of rb_frame in bytes */
+	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+	if ((psp_write_ptr_reg % ring_size_dw) == 0)
+		write_frame = ring_buffer_start;
+	else
+		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+	/* Check invalid write_frame ptr address */
+	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+		dev_err(adev->dev,
+			"ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+			ring_buffer_start, ring_buffer_end, write_frame);
+		dev_err(adev->dev,
+			"write_frame is pointing to address out of bounds\n");
+		return -EINVAL;
+	}
+
+	/* Initialize KM RB frame */
+	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+	/* Update KM RB frame */
+	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+	write_frame->fence_value = index;
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	/* Update the write Pointer in DWORDs */
+	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+	psp_ring_set_wptr(psp, psp_write_ptr_reg);
+	return 0;
+}
+
+int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
+{
+	struct amdgpu_device *adev = psp->adev;
+	const struct psp_firmware_header_v1_0 *asd_hdr;
+	int err = 0;
+
+	err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_asd.bin", chip_name);
+	if (err)
+		goto out;
+
+	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
+	adev->psp.asd_context.bin_desc.fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+	adev->psp.asd_context.bin_desc.feature_version = le32_to_cpu(asd_hdr->sos.fw_version);
+	adev->psp.asd_context.bin_desc.size_bytes = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+	adev->psp.asd_context.bin_desc.start_addr = (uint8_t *)asd_hdr +
+				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->psp.asd_fw);
+	return err;
+}
+
+int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
+{
+	struct amdgpu_device *adev = psp->adev;
+	const struct psp_firmware_header_v1_0 *toc_hdr;
+	int err = 0;
+
+	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_toc.bin", chip_name);
+	if (err)
+		goto out;
+
+	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->psp.toc_fw);
+	return err;
+}
+
+static int parse_sos_bin_descriptor(struct psp_context *psp,
+				   const struct psp_fw_bin_desc *desc,
+				   const struct psp_firmware_header_v2_0 *sos_hdr)
+{
+	uint8_t *ucode_start_addr  = NULL;
+
+	if (!psp || !desc || !sos_hdr)
+		return -EINVAL;
+
+	ucode_start_addr  = (uint8_t *)sos_hdr +
+			    le32_to_cpu(desc->offset_bytes) +
+			    le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+	switch (desc->fw_type) {
+	case PSP_FW_TYPE_PSP_SOS:
+		psp->sos.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->sos.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->sos.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->sos.start_addr	   = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_SYS_DRV:
+		psp->sys.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->sys.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->sys.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->sys.start_addr        = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_KDB:
+		psp->kdb.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->kdb.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->kdb.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->kdb.start_addr        = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_TOC:
+		psp->toc.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->toc.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->toc.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->toc.start_addr        = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_SPL:
+		psp->spl.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->spl.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->spl.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->spl.start_addr        = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_RL:
+		psp->rl.fw_version         = le32_to_cpu(desc->fw_version);
+		psp->rl.feature_version    = le32_to_cpu(desc->fw_version);
+		psp->rl.size_bytes         = le32_to_cpu(desc->size_bytes);
+		psp->rl.start_addr         = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_SOC_DRV:
+		psp->soc_drv.fw_version         = le32_to_cpu(desc->fw_version);
+		psp->soc_drv.feature_version    = le32_to_cpu(desc->fw_version);
+		psp->soc_drv.size_bytes         = le32_to_cpu(desc->size_bytes);
+		psp->soc_drv.start_addr         = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_INTF_DRV:
+		psp->intf_drv.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->intf_drv.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->intf_drv.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->intf_drv.start_addr        = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_DBG_DRV:
+		psp->dbg_drv.fw_version         = le32_to_cpu(desc->fw_version);
+		psp->dbg_drv.feature_version    = le32_to_cpu(desc->fw_version);
+		psp->dbg_drv.size_bytes         = le32_to_cpu(desc->size_bytes);
+		psp->dbg_drv.start_addr         = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_RAS_DRV:
+		psp->ras_drv.fw_version         = le32_to_cpu(desc->fw_version);
+		psp->ras_drv.feature_version    = le32_to_cpu(desc->fw_version);
+		psp->ras_drv.size_bytes         = le32_to_cpu(desc->size_bytes);
+		psp->ras_drv.start_addr         = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_IPKEYMGR_DRV:
+		psp->ipkeymgr_drv.fw_version         = le32_to_cpu(desc->fw_version);
+		psp->ipkeymgr_drv.feature_version    = le32_to_cpu(desc->fw_version);
+		psp->ipkeymgr_drv.size_bytes         = le32_to_cpu(desc->size_bytes);
+		psp->ipkeymgr_drv.start_addr         = ucode_start_addr;
+		break;
+	case PSP_FW_TYPE_PSP_SPDM_DRV:
+		psp->spdm_drv.fw_version	= le32_to_cpu(desc->fw_version);
+		psp->spdm_drv.feature_version	= le32_to_cpu(desc->fw_version);
+		psp->spdm_drv.size_bytes	= le32_to_cpu(desc->size_bytes);
+		psp->spdm_drv.start_addr	= ucode_start_addr;
+		break;
+	default:
+		dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
+		break;
+	}
+
+	return 0;
+}
+
+static int psp_init_sos_base_fw(struct amdgpu_device *adev)
+{
+	const struct psp_firmware_header_v1_0 *sos_hdr;
+	const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
+	uint8_t *ucode_array_start_addr;
+
+	sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+	ucode_array_start_addr = (uint8_t *)sos_hdr +
+		le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+
+	if (adev->gmc.xgmi.connected_to_cpu ||
+	    (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2))) {
+		adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
+		adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version);
+
+		adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr->sos.offset_bytes);
+		adev->psp.sys.start_addr = ucode_array_start_addr;
+
+		adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes);
+		adev->psp.sos.start_addr = ucode_array_start_addr +
+				le32_to_cpu(sos_hdr->sos.offset_bytes);
+	} else {
+		/* Load alternate PSP SOS FW */
+		sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+
+		adev->psp.sos.fw_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+		adev->psp.sos.feature_version = le32_to_cpu(sos_hdr_v1_3->sos_aux.fw_version);
+
+		adev->psp.sys.size_bytes = le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.size_bytes);
+		adev->psp.sys.start_addr = ucode_array_start_addr +
+			le32_to_cpu(sos_hdr_v1_3->sys_drv_aux.offset_bytes);
+
+		adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes);
+		adev->psp.sos.start_addr = ucode_array_start_addr +
+			le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes);
+	}
+
+	if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) {
+		dev_warn(adev->dev, "PSP SOS FW not available");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
+{
+	struct amdgpu_device *adev = psp->adev;
+	const struct psp_firmware_header_v1_0 *sos_hdr;
+	const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
+	const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;
+	const struct psp_firmware_header_v1_3 *sos_hdr_v1_3;
+	const struct psp_firmware_header_v2_0 *sos_hdr_v2_0;
+	const struct psp_firmware_header_v2_1 *sos_hdr_v2_1;
+	int fw_index, fw_bin_count, start_index = 0;
+	const struct psp_fw_bin_desc *fw_bin;
+	uint8_t *ucode_array_start_addr;
+	int err = 0;
+
+	if (amdgpu_is_kicker_fw(adev))
+		err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s_sos_kicker.bin", chip_name);
+	else
+		err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s_sos.bin", chip_name);
+	if (err)
+		goto out;
+
+	sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+	ucode_array_start_addr = (uint8_t *)sos_hdr +
+		le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
+	amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
+
+	switch (sos_hdr->header.header_version_major) {
+	case 1:
+		err = psp_init_sos_base_fw(adev);
+		if (err)
+			goto out;
+
+		if (sos_hdr->header.header_version_minor == 1) {
+			sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
+			adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_1->toc.size_bytes);
+			adev->psp.toc.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+					le32_to_cpu(sos_hdr_v1_1->toc.offset_bytes);
+			adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_1->kdb.size_bytes);
+			adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+					le32_to_cpu(sos_hdr_v1_1->kdb.offset_bytes);
+		}
+		if (sos_hdr->header.header_version_minor == 2) {
+			sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data;
+			adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_2->kdb.size_bytes);
+			adev->psp.kdb.start_addr = (uint8_t *)adev->psp.sys.start_addr +
+						    le32_to_cpu(sos_hdr_v1_2->kdb.offset_bytes);
+		}
+		if (sos_hdr->header.header_version_minor == 3) {
+			sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data;
+			adev->psp.toc.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.toc.size_bytes);
+			adev->psp.toc.start_addr = ucode_array_start_addr +
+				le32_to_cpu(sos_hdr_v1_3->v1_1.toc.offset_bytes);
+			adev->psp.kdb.size_bytes = le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.size_bytes);
+			adev->psp.kdb.start_addr = ucode_array_start_addr +
+				le32_to_cpu(sos_hdr_v1_3->v1_1.kdb.offset_bytes);
+			adev->psp.spl.size_bytes = le32_to_cpu(sos_hdr_v1_3->spl.size_bytes);
+			adev->psp.spl.start_addr = ucode_array_start_addr +
+				le32_to_cpu(sos_hdr_v1_3->spl.offset_bytes);
+			adev->psp.rl.size_bytes = le32_to_cpu(sos_hdr_v1_3->rl.size_bytes);
+			adev->psp.rl.start_addr = ucode_array_start_addr +
+				le32_to_cpu(sos_hdr_v1_3->rl.offset_bytes);
+		}
+		break;
+	case 2:
+		sos_hdr_v2_0 = (const struct psp_firmware_header_v2_0 *)adev->psp.sos_fw->data;
+
+		fw_bin_count = le32_to_cpu(sos_hdr_v2_0->psp_fw_bin_count);
+
+		if (fw_bin_count >= UCODE_MAX_PSP_PACKAGING) {
+			dev_err(adev->dev, "packed SOS count exceeds maximum limit\n");
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (sos_hdr_v2_0->header.header_version_minor == 1) {
+			sos_hdr_v2_1 = (const struct psp_firmware_header_v2_1 *)adev->psp.sos_fw->data;
+
+			fw_bin = sos_hdr_v2_1->psp_fw_bin;
+
+			if (psp_is_aux_sos_load_required(psp))
+				start_index = le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+			else
+				fw_bin_count -= le32_to_cpu(sos_hdr_v2_1->psp_aux_fw_bin_index);
+
+		} else {
+			fw_bin = sos_hdr_v2_0->psp_fw_bin;
+		}
+
+		for (fw_index = start_index; fw_index < fw_bin_count; fw_index++) {
+			err = parse_sos_bin_descriptor(psp, fw_bin + fw_index,
+						       sos_hdr_v2_0);
+			if (err)
+				goto out;
+		}
+		break;
+	default:
+		dev_err(adev->dev,
+			"unsupported psp sos firmware\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->psp.sos_fw);
+
+	return err;
+}
+
+static bool is_ta_fw_applicable(struct psp_context *psp,
+			     const struct psp_fw_bin_desc *desc)
+{
+	struct amdgpu_device *adev = psp->adev;
+	uint32_t fw_version;
+
+	switch (desc->fw_type) {
+	case TA_FW_TYPE_PSP_XGMI:
+	case TA_FW_TYPE_PSP_XGMI_AUX:
+		/* for now, AUX TA only exists on 13.0.6 ta bin,
+		 * from v20.00.0x.14
+		 */
+		if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+		    IP_VERSION(13, 0, 6)) {
+			fw_version = le32_to_cpu(desc->fw_version);
+
+			if (adev->flags & AMD_IS_APU &&
+			    (fw_version & 0xff) >= 0x14)
+				return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX;
+			else
+				return desc->fw_type == TA_FW_TYPE_PSP_XGMI;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static int parse_ta_bin_descriptor(struct psp_context *psp,
+				   const struct psp_fw_bin_desc *desc,
+				   const struct ta_firmware_header_v2_0 *ta_hdr)
+{
+	uint8_t *ucode_start_addr  = NULL;
+
+	if (!psp || !desc || !ta_hdr)
+		return -EINVAL;
+
+	if (!is_ta_fw_applicable(psp, desc))
+		return 0;
+
+	ucode_start_addr  = (uint8_t *)ta_hdr +
+			    le32_to_cpu(desc->offset_bytes) +
+			    le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+	switch (desc->fw_type) {
+	case TA_FW_TYPE_PSP_ASD:
+		psp->asd_context.bin_desc.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->asd_context.bin_desc.feature_version   = le32_to_cpu(desc->fw_version);
+		psp->asd_context.bin_desc.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->asd_context.bin_desc.start_addr        = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_XGMI:
+	case TA_FW_TYPE_PSP_XGMI_AUX:
+		psp->xgmi_context.context.bin_desc.fw_version       = le32_to_cpu(desc->fw_version);
+		psp->xgmi_context.context.bin_desc.size_bytes       = le32_to_cpu(desc->size_bytes);
+		psp->xgmi_context.context.bin_desc.start_addr       = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_RAS:
+		psp->ras_context.context.bin_desc.fw_version        = le32_to_cpu(desc->fw_version);
+		psp->ras_context.context.bin_desc.size_bytes        = le32_to_cpu(desc->size_bytes);
+		psp->ras_context.context.bin_desc.start_addr        = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_HDCP:
+		psp->hdcp_context.context.bin_desc.fw_version       = le32_to_cpu(desc->fw_version);
+		psp->hdcp_context.context.bin_desc.size_bytes       = le32_to_cpu(desc->size_bytes);
+		psp->hdcp_context.context.bin_desc.start_addr       = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_DTM:
+		psp->dtm_context.context.bin_desc.fw_version       = le32_to_cpu(desc->fw_version);
+		psp->dtm_context.context.bin_desc.size_bytes       = le32_to_cpu(desc->size_bytes);
+		psp->dtm_context.context.bin_desc.start_addr       = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_RAP:
+		psp->rap_context.context.bin_desc.fw_version       = le32_to_cpu(desc->fw_version);
+		psp->rap_context.context.bin_desc.size_bytes       = le32_to_cpu(desc->size_bytes);
+		psp->rap_context.context.bin_desc.start_addr       = ucode_start_addr;
+		break;
+	case TA_FW_TYPE_PSP_SECUREDISPLAY:
+		psp->securedisplay_context.context.bin_desc.fw_version =
+			le32_to_cpu(desc->fw_version);
+		psp->securedisplay_context.context.bin_desc.size_bytes =
+			le32_to_cpu(desc->size_bytes);
+		psp->securedisplay_context.context.bin_desc.start_addr =
+			ucode_start_addr;
+		break;
+	default:
+		dev_warn(psp->adev->dev, "Unsupported TA type: %d\n", desc->fw_type);
+		break;
+	}
+
+	return 0;
+}
+
+static int parse_ta_v1_microcode(struct psp_context *psp)
+{
+	const struct ta_firmware_header_v1_0 *ta_hdr;
+	struct amdgpu_device *adev = psp->adev;
+
+	ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data;
+
+	if (le16_to_cpu(ta_hdr->header.header_version_major) != 1)
+		return -EINVAL;
+
+	adev->psp.xgmi_context.context.bin_desc.fw_version =
+		le32_to_cpu(ta_hdr->xgmi.fw_version);
+	adev->psp.xgmi_context.context.bin_desc.size_bytes =
+		le32_to_cpu(ta_hdr->xgmi.size_bytes);
+	adev->psp.xgmi_context.context.bin_desc.start_addr =
+		(uint8_t *)ta_hdr +
+		le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+	adev->psp.ras_context.context.bin_desc.fw_version =
+		le32_to_cpu(ta_hdr->ras.fw_version);
+	adev->psp.ras_context.context.bin_desc.size_bytes =
+		le32_to_cpu(ta_hdr->ras.size_bytes);
+	adev->psp.ras_context.context.bin_desc.start_addr =
+		(uint8_t *)adev->psp.xgmi_context.context.bin_desc.start_addr +
+		le32_to_cpu(ta_hdr->ras.offset_bytes);
+
+	adev->psp.hdcp_context.context.bin_desc.fw_version =
+		le32_to_cpu(ta_hdr->hdcp.fw_version);
+	adev->psp.hdcp_context.context.bin_desc.size_bytes =
+		le32_to_cpu(ta_hdr->hdcp.size_bytes);
+	adev->psp.hdcp_context.context.bin_desc.start_addr =
+		(uint8_t *)ta_hdr +
+		le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+
+	adev->psp.dtm_context.context.bin_desc.fw_version =
+		le32_to_cpu(ta_hdr->dtm.fw_version);
+	adev->psp.dtm_context.context.bin_desc.size_bytes =
+		le32_to_cpu(ta_hdr->dtm.size_bytes);
+	adev->psp.dtm_context.context.bin_desc.start_addr =
+		(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+		le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+	adev->psp.securedisplay_context.context.bin_desc.fw_version =
+		le32_to_cpu(ta_hdr->securedisplay.fw_version);
+	adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+		le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+	adev->psp.securedisplay_context.context.bin_desc.start_addr =
+		(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+		le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+
+	adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+
+	return 0;
+}
+
+static int parse_ta_v2_microcode(struct psp_context *psp)
+{
+	const struct ta_firmware_header_v2_0 *ta_hdr;
+	struct amdgpu_device *adev = psp->adev;
+	int err = 0;
+	int ta_index = 0;
+
+	ta_hdr = (const struct ta_firmware_header_v2_0 *)adev->psp.ta_fw->data;
+
+	if (le16_to_cpu(ta_hdr->header.header_version_major) != 2)
+		return -EINVAL;
+
+	if (le32_to_cpu(ta_hdr->ta_fw_bin_count) >= UCODE_MAX_PSP_PACKAGING) {
+		dev_err(adev->dev, "packed TA count exceeds maximum limit\n");
+		return -EINVAL;
+	}
+
+	for (ta_index = 0; ta_index < le32_to_cpu(ta_hdr->ta_fw_bin_count); ta_index++) {
+		err = parse_ta_bin_descriptor(psp,
+					      &ta_hdr->ta_fw_bin[ta_index],
+					      ta_hdr);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
+{
+	const struct common_firmware_header *hdr;
+	struct amdgpu_device *adev = psp->adev;
+	int err;
+
+	if (amdgpu_is_kicker_fw(adev))
+		err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s_ta_kicker.bin", chip_name);
+	else
+		err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s_ta.bin", chip_name);
+	if (err)
+		return err;
+
+	hdr = (const struct common_firmware_header *)adev->psp.ta_fw->data;
+	switch (le16_to_cpu(hdr->header_version_major)) {
+	case 1:
+		err = parse_ta_v1_microcode(psp);
+		break;
+	case 2:
+		err = parse_ta_v2_microcode(psp);
+		break;
+	default:
+		dev_err(adev->dev, "unsupported TA header version\n");
+		err = -EINVAL;
+	}
+
+	if (err)
+		amdgpu_ucode_release(&adev->psp.ta_fw);
+
+	return err;
+}
+
+int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
+{
+	struct amdgpu_device *adev = psp->adev;
+	const struct psp_firmware_header_v1_0 *cap_hdr_v1_0;
+	struct amdgpu_firmware_info *info = NULL;
+	int err = 0;
+
+	if (!amdgpu_sriov_vf(adev)) {
+		dev_err(adev->dev, "cap microcode should only be loaded under SRIOV\n");
+		return -EINVAL;
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+				   "amdgpu/%s_cap.bin", chip_name);
+	if (err) {
+		if (err == -ENODEV) {
+			dev_warn(adev->dev, "cap microcode does not exist, skip\n");
+			err = 0;
+		} else {
+			dev_err(adev->dev, "fail to initialize cap microcode\n");
+		}
+		goto out;
+	}
+
+	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
+	info->ucode_id = AMDGPU_UCODE_ID_CAP;
+	info->fw = adev->psp.cap_fw;
+	cap_hdr_v1_0 = (const struct psp_firmware_header_v1_0 *)
+		adev->psp.cap_fw->data;
+	adev->firmware.fw_size += ALIGN(
+			le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes), PAGE_SIZE);
+	adev->psp.cap_fw_version = le32_to_cpu(cap_hdr_v1_0->header.ucode_version);
+	adev->psp.cap_feature_version = le32_to_cpu(cap_hdr_v1_0->sos.fw_version);
+	adev->psp.cap_ucode_size = le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes);
+
+	return 0;
+
+out:
+	amdgpu_ucode_release(&adev->psp.cap_fw);
+	return err;
+}
+
+int psp_config_sq_perfmon(struct psp_context *psp,
+		uint32_t xcp_id, bool core_override_enable,
+		bool reg_override_enable, bool perfmon_override_enable)
+{
+	int ret;
+
+	if (amdgpu_sriov_vf(psp->adev))
+		return 0;
+
+	if (xcp_id > MAX_XCP) {
+		dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id);
+		return -EINVAL;
+	}
+
+	if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) {
+		dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n",
+			amdgpu_ip_version(psp->adev, MP0_HWIP, 0));
+		return -EINVAL;
+	}
+	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+	cmd->cmd_id	=	GFX_CMD_ID_CONFIG_SQ_PERFMON;
+	cmd->cmd.config_sq_perfmon.gfx_xcp_mask	=	BIT_MASK(xcp_id);
+	cmd->cmd.config_sq_perfmon.core_override	=	core_override_enable;
+	cmd->cmd.config_sq_perfmon.reg_override	=	reg_override_enable;
+	cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable;
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+	if (ret)
+		dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n",
+			xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable);
+
+	release_psp_cmd_buf(psp);
+	return ret;
+}
+
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct amdgpu_ip_block *ip_block;
+	uint32_t fw_ver;
+	int ret;
+
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+	if (!ip_block || !ip_block->status.late_initialized) {
+		dev_info(adev->dev, "PSP block is not ready yet\n.");
+		return -EBUSY;
+	}
+
+	mutex_lock(&adev->psp.mutex);
+	ret = psp_read_usbc_pd_fw(&adev->psp, &fw_ver);
+	mutex_unlock(&adev->psp.mutex);
+
+	if (ret) {
+		dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret);
+		return ret;
+	}
+
+	return sysfs_emit(buf, "%x\n", fw_ver);
+}
+
+static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
+						       struct device_attribute *attr,
+						       const char *buf,
+						       size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	int ret, idx;
+	const struct firmware *usbc_pd_fw;
+	struct amdgpu_bo *fw_buf_bo = NULL;
+	uint64_t fw_pri_mc_addr;
+	void *fw_pri_cpu_addr;
+	struct amdgpu_ip_block *ip_block;
+
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+	if (!ip_block || !ip_block->status.late_initialized) {
+		dev_err(adev->dev, "PSP block is not ready yet.");
+		return -EBUSY;
+	}
+
+	if (!drm_dev_enter(ddev, &idx))
+		return -ENODEV;
+
+	ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s", buf);
+	if (ret)
+		goto fail;
+
+	/* LFB address which is aligned to 1MB boundary per PSP request */
+	ret = amdgpu_bo_create_kernel(adev, usbc_pd_fw->size, 0x100000,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &fw_buf_bo, &fw_pri_mc_addr,
+				      &fw_pri_cpu_addr);
+	if (ret)
+		goto rel_buf;
+
+	memcpy_toio(fw_pri_cpu_addr, usbc_pd_fw->data, usbc_pd_fw->size);
+
+	mutex_lock(&adev->psp.mutex);
+	ret = psp_load_usbc_pd_fw(&adev->psp, fw_pri_mc_addr);
+	mutex_unlock(&adev->psp.mutex);
+
+	amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+	amdgpu_ucode_release(&usbc_pd_fw);
+fail:
+	if (ret) {
+		dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret);
+		count = ret;
+	}
+
+	drm_dev_exit(idx);
+	return count;
+}
+
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size)
+{
+	int idx;
+
+	if (!drm_dev_enter(adev_to_drm(psp->adev), &idx))
+		return;
+
+	memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+	memcpy(psp->fw_pri_buf, start_addr, bin_size);
+
+	drm_dev_exit(idx);
+}
+
+/**
+ * DOC: usbc_pd_fw
+ * Reading from this file will retrieve the USB-C PD firmware version. Writing to
+ * this file will trigger the update process.
+ */
+static DEVICE_ATTR(usbc_pd_fw, 0644,
+		   psp_usbc_pd_fw_sysfs_read,
+		   psp_usbc_pd_fw_sysfs_write);
+
+int is_psp_fw_valid(struct psp_bin_desc bin)
+{
+	return bin.size_bytes;
+}
+
+static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
+					const struct bin_attribute *bin_attr,
+					char *buffer, loff_t pos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	adev->psp.vbflash_done = false;
+
+	/* Safeguard against memory drain */
+	if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) {
+		dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B);
+		kvfree(adev->psp.vbflash_tmp_buf);
+		adev->psp.vbflash_tmp_buf = NULL;
+		adev->psp.vbflash_image_size = 0;
+		return -ENOMEM;
+	}
+
+	/* TODO Just allocate max for now and optimize to realloc later if needed */
+	if (!adev->psp.vbflash_tmp_buf) {
+		adev->psp.vbflash_tmp_buf = kvmalloc(AMD_VBIOS_FILE_MAX_SIZE_B, GFP_KERNEL);
+		if (!adev->psp.vbflash_tmp_buf)
+			return -ENOMEM;
+	}
+
+	mutex_lock(&adev->psp.mutex);
+	memcpy(adev->psp.vbflash_tmp_buf + pos, buffer, count);
+	adev->psp.vbflash_image_size += count;
+	mutex_unlock(&adev->psp.mutex);
+
+	dev_dbg(adev->dev, "IFWI staged for update\n");
+
+	return count;
+}
+
+static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
+				       const struct bin_attribute *bin_attr, char *buffer,
+				       loff_t pos, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct amdgpu_bo *fw_buf_bo = NULL;
+	uint64_t fw_pri_mc_addr;
+	void *fw_pri_cpu_addr;
+	int ret;
+
+	if (adev->psp.vbflash_image_size == 0)
+		return -EINVAL;
+
+	dev_dbg(adev->dev, "PSP IFWI flash process initiated\n");
+
+	ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size,
+					AMDGPU_GPU_PAGE_SIZE,
+					AMDGPU_GEM_DOMAIN_VRAM,
+					&fw_buf_bo,
+					&fw_pri_mc_addr,
+					&fw_pri_cpu_addr);
+	if (ret)
+		goto rel_buf;
+
+	memcpy_toio(fw_pri_cpu_addr, adev->psp.vbflash_tmp_buf, adev->psp.vbflash_image_size);
+
+	mutex_lock(&adev->psp.mutex);
+	ret = psp_update_spirom(&adev->psp, fw_pri_mc_addr);
+	mutex_unlock(&adev->psp.mutex);
+
+	amdgpu_bo_free_kernel(&fw_buf_bo, &fw_pri_mc_addr, &fw_pri_cpu_addr);
+
+rel_buf:
+	kvfree(adev->psp.vbflash_tmp_buf);
+	adev->psp.vbflash_tmp_buf = NULL;
+	adev->psp.vbflash_image_size = 0;
+
+	if (ret) {
+		dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret);
+		return ret;
+	}
+
+	dev_dbg(adev->dev, "PSP IFWI flash process done\n");
+	return 0;
+}
+
+/**
+ * DOC: psp_vbflash
+ * Writing to this file will stage an IFWI for update. Reading from this file
+ * will trigger the update process.
+ */
+static const struct bin_attribute psp_vbflash_bin_attr = {
+	.attr = {.name = "psp_vbflash", .mode = 0660},
+	.size = 0,
+	.write = amdgpu_psp_vbflash_write,
+	.read = amdgpu_psp_vbflash_read,
+};
+
+/**
+ * DOC: psp_vbflash_status
+ * The status of the flash process.
+ * 0: IFWI flash not complete.
+ * 1: IFWI flash complete.
+ */
+static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	uint32_t vbflash_status;
+
+	vbflash_status = psp_vbflash_status(&adev->psp);
+	if (!adev->psp.vbflash_done)
+		vbflash_status = 0;
+	else if (adev->psp.vbflash_done && !(vbflash_status & 0x80000000))
+		vbflash_status = 1;
+
+	return sysfs_emit(buf, "0x%x\n", vbflash_status);
+}
+static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
+
+static const struct bin_attribute *const bin_flash_attrs[] = {
+	&psp_vbflash_bin_attr,
+	NULL
+};
+
+static struct attribute *flash_attrs[] = {
+	&dev_attr_psp_vbflash_status.attr,
+	&dev_attr_usbc_pd_fw.attr,
+	NULL
+};
+
+static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (attr == &dev_attr_usbc_pd_fw.attr)
+		return adev->psp.sup_pd_fw_up ? 0660 : 0;
+
+	return adev->psp.sup_ifwi_up ? 0440 : 0;
+}
+
+static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
+						const struct bin_attribute *attr,
+						int idx)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return adev->psp.sup_ifwi_up ? 0660 : 0;
+}
+
+const struct attribute_group amdgpu_flash_attr_group = {
+	.attrs = flash_attrs,
+	.bin_attrs = bin_flash_attrs,
+	.is_bin_visible = amdgpu_bin_flash_attr_is_visible,
+	.is_visible = amdgpu_flash_attr_is_visible,
+};
+
+#if defined(CONFIG_DEBUG_FS)
+static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp)
+{
+	struct amdgpu_device *adev = filp->f_inode->i_private;
+	struct spirom_bo *bo_triplet;
+	int ret;
+
+	/* serialize the open() file calling */
+	if (!mutex_trylock(&adev->psp.mutex))
+		return -EBUSY;
+
+	/*
+	 * make sure only one userpace process is alive for dumping so that
+	 * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed.
+	 * let's say the case where one process try opening the file while
+	 * another one has proceeded to read or release. In this way, eliminate
+	 * the use of mutex for read() or release() callback as well.
+	 */
+	if (adev->psp.spirom_dump_trip) {
+		mutex_unlock(&adev->psp.mutex);
+		return -EBUSY;
+	}
+
+	bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
+	if (!bo_triplet) {
+		mutex_unlock(&adev->psp.mutex);
+		return -ENOMEM;
+	}
+
+	ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
+				      AMDGPU_GPU_PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &bo_triplet->bo,
+				      &bo_triplet->mc_addr,
+				      &bo_triplet->cpu_addr);
+	if (ret)
+		goto rel_trip;
+
+	ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
+	if (ret)
+		goto rel_bo;
+
+	adev->psp.spirom_dump_trip = bo_triplet;
+	mutex_unlock(&adev->psp.mutex);
+	return 0;
+rel_bo:
+	amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+			      &bo_triplet->cpu_addr);
+rel_trip:
+	kfree(bo_triplet);
+	mutex_unlock(&adev->psp.mutex);
+	dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
+	return ret;
+}
+
+static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size,
+					    loff_t *pos)
+{
+	struct amdgpu_device *adev = filp->f_inode->i_private;
+	struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+	if (!bo_triplet)
+		return -EINVAL;
+
+	return simple_read_from_buffer(buf,
+				       size,
+				       pos, bo_triplet->cpu_addr,
+				       AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+}
+
+static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp)
+{
+	struct amdgpu_device *adev = filp->f_inode->i_private;
+	struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+	if (bo_triplet) {
+		amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+				      &bo_triplet->cpu_addr);
+		kfree(bo_triplet);
+	}
+
+	adev->psp.spirom_dump_trip = NULL;
+	return 0;
+}
+
+static const struct file_operations psp_dump_spirom_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.open = psp_read_spirom_debugfs_open,
+	.read = psp_read_spirom_debugfs_read,
+	.release = psp_read_spirom_debugfs_release,
+	.llseek = default_llseek,
+};
+#endif
+
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+	debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
+				 adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+#endif
+}
+
+const struct amd_ip_funcs psp_ip_funcs = {
+	.name = "psp",
+	.early_init = psp_early_init,
+	.sw_init = psp_sw_init,
+	.sw_fini = psp_sw_fini,
+	.hw_init = psp_hw_init,
+	.hw_fini = psp_hw_fini,
+	.suspend = psp_suspend,
+	.resume = psp_resume,
+	.set_clockgating_state = psp_set_clockgating_state,
+	.set_powergating_state = psp_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version psp_v3_1_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 3,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v10_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 10,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v11_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 11,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 11,
+	.minor = 0,
+	.rev = 8,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v12_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 12,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v13_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 13,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 13,
+	.minor = 0,
+	.rev = 4,
+	.funcs = &psp_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version psp_v14_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_PSP,
+	.major = 14,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
new file mode 100644
index 000000000000..237b624aa51c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -0,0 +1,626 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Huang Rui
+ *
+ */
+#ifndef __AMDGPU_PSP_H__
+#define __AMDGPU_PSP_H__
+
+#include "amdgpu.h"
+#include "psp_gfx_if.h"
+#include "ta_xgmi_if.h"
+#include "ta_ras_if.h"
+#include "ta_rap_if.h"
+#include "ta_secureDisplay_if.h"
+
+#define PSP_FENCE_BUFFER_SIZE	0x1000
+#define PSP_CMD_BUFFER_SIZE	0x1000
+#define PSP_1_MEG		0x100000
+#define PSP_TMR_SIZE(adev)	((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
+#define PSP_TMR_ALIGNMENT	0x100000
+#define PSP_FW_NAME_LEN		0x24
+
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10
+#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
+
+/* Command register bit 31 set to indicate readiness */
+#define MBOX_TOS_READY_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_READY_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+/* Values to check for a successful GFX_CMD response wait. Check against
+ * both status bits and response state - helps to detect a command failure
+ * or other unexpected cases like a device drop reading all 0xFFs
+ */
+#define MBOX_TOS_RESP_FLAG (GFX_FLAG_RESPONSE)
+#define MBOX_TOS_RESP_MASK (GFX_CMD_RESPONSE_MASK | GFX_CMD_STATUS_MASK)
+
+extern const struct attribute_group amdgpu_flash_attr_group;
+
+enum psp_shared_mem_size {
+	PSP_ASD_SHARED_MEM_SIZE				= 0x0,
+	PSP_XGMI_SHARED_MEM_SIZE			= 0x4000,
+	PSP_RAS_SHARED_MEM_SIZE				= 0x4000,
+	PSP_HDCP_SHARED_MEM_SIZE			= 0x4000,
+	PSP_DTM_SHARED_MEM_SIZE				= 0x4000,
+	PSP_RAP_SHARED_MEM_SIZE				= 0x4000,
+	PSP_SECUREDISPLAY_SHARED_MEM_SIZE	= 0x4000,
+};
+
+enum ta_type_id {
+	TA_TYPE_XGMI = 1,
+	TA_TYPE_RAS,
+	TA_TYPE_HDCP,
+	TA_TYPE_DTM,
+	TA_TYPE_RAP,
+	TA_TYPE_SECUREDISPLAY,
+
+	TA_TYPE_MAX_INDEX,
+};
+
+struct psp_context;
+struct psp_xgmi_node_info;
+struct psp_xgmi_topology_info;
+struct psp_bin_desc;
+
+enum psp_bootloader_cmd {
+	PSP_BL__LOAD_SYSDRV		= 0x10000,
+	PSP_BL__LOAD_SOSDRV		= 0x20000,
+	PSP_BL__LOAD_KEY_DATABASE	= 0x80000,
+	PSP_BL__LOAD_SOCDRV             = 0xB0000,
+	PSP_BL__LOAD_DBGDRV             = 0xC0000,
+	PSP_BL__LOAD_HADDRV		= PSP_BL__LOAD_DBGDRV,
+	PSP_BL__LOAD_INTFDRV		= 0xD0000,
+	PSP_BL__LOAD_RASDRV		= 0xE0000,
+	PSP_BL__LOAD_IPKEYMGRDRV	= 0xF0000,
+	PSP_BL__DRAM_LONG_TRAIN		= 0x100000,
+	PSP_BL__DRAM_SHORT_TRAIN	= 0x200000,
+	PSP_BL__LOAD_TOS_SPL_TABLE	= 0x10000000,
+	PSP_BL__LOAD_SPDMDRV		= 0x20000000,
+};
+
+enum psp_ring_type {
+	PSP_RING_TYPE__INVALID = 0,
+	/*
+	 * These values map to the way the PSP kernel identifies the
+	 * rings.
+	 */
+	PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */
+	PSP_RING_TYPE__KM = 2  /* Kernel mode ring (formerly called GPCOM) */
+};
+
+struct psp_ring {
+	enum psp_ring_type		ring_type;
+	struct psp_gfx_rb_frame		*ring_mem;
+	uint64_t			ring_mem_mc_addr;
+	void				*ring_mem_handle;
+	uint32_t			ring_size;
+	uint32_t			ring_wptr;
+};
+
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+	PSP_REG_IH_RB_CNTL        = 0,  /* register IH_RB_CNTL */
+	PSP_REG_IH_RB_CNTL_RING1  = 1,  /* register IH_RB_CNTL_RING1 */
+	PSP_REG_IH_RB_CNTL_RING2  = 2,  /* register IH_RB_CNTL_RING2 */
+	PSP_REG_MMHUB_L1_TLB_CNTL = 25,
+	PSP_REG_LAST
+};
+
+#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */
+#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */
+
+struct psp_funcs {
+	int (*init_microcode)(struct psp_context *psp);
+	int (*wait_for_bootloader)(struct psp_context *psp);
+	int (*bootloader_load_kdb)(struct psp_context *psp);
+	int (*bootloader_load_spl)(struct psp_context *psp);
+	int (*bootloader_load_sysdrv)(struct psp_context *psp);
+	int (*bootloader_load_soc_drv)(struct psp_context *psp);
+	int (*bootloader_load_intf_drv)(struct psp_context *psp);
+	int (*bootloader_load_dbg_drv)(struct psp_context *psp);
+	int (*bootloader_load_ras_drv)(struct psp_context *psp);
+	int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+	int (*bootloader_load_spdm_drv)(struct psp_context *psp);
+	int (*bootloader_load_sos)(struct psp_context *psp);
+	int (*ring_create)(struct psp_context *psp,
+			   enum psp_ring_type ring_type);
+	int (*ring_stop)(struct psp_context *psp,
+			    enum psp_ring_type ring_type);
+	int (*ring_destroy)(struct psp_context *psp,
+			    enum psp_ring_type ring_type);
+	bool (*smu_reload_quirk)(struct psp_context *psp);
+	int (*mode1_reset)(struct psp_context *psp);
+	int (*mem_training)(struct psp_context *psp, uint32_t ops);
+	uint32_t (*ring_get_wptr)(struct psp_context *psp);
+	void (*ring_set_wptr)(struct psp_context *psp, uint32_t value);
+	int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+	int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
+	int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+	int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+	int (*vbflash_stat)(struct psp_context *psp);
+	int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+	bool (*get_ras_capability)(struct psp_context *psp);
+	bool (*is_aux_sos_load_required)(struct psp_context *psp);
+	bool (*is_reload_needed)(struct psp_context *psp);
+	int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+				   enum psp_reg_prog_id id);
+};
+
+struct ta_funcs {
+	int (*fn_ta_initialize)(struct psp_context *psp);
+	int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id);
+	int (*fn_ta_terminate)(struct psp_context *psp);
+};
+
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
+struct psp_xgmi_node_info {
+	uint64_t				node_id;
+	uint8_t					num_hops;
+	uint8_t					is_sharing_enabled;
+	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
+	uint8_t					num_links;
+	struct xgmi_connected_port_num		port_num[TA_XGMI__MAX_PORT_NUM];
+};
+
+struct psp_xgmi_topology_info {
+	uint32_t			num_nodes;
+	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
+struct psp_bin_desc {
+	uint32_t fw_version;
+	uint32_t feature_version;
+	uint32_t size_bytes;
+	uint8_t *start_addr;
+};
+
+struct ta_mem_context {
+	struct amdgpu_bo		*shared_bo;
+	uint64_t		shared_mc_addr;
+	void			*shared_buf;
+	enum psp_shared_mem_size	shared_mem_size;
+};
+
+struct ta_context {
+	bool			initialized;
+	uint32_t		session_id;
+	uint32_t		resp_status;
+	struct ta_mem_context	mem_context;
+	struct psp_bin_desc		bin_desc;
+	enum psp_gfx_cmd_id		ta_load_type;
+	enum ta_type_id		ta_type;
+};
+
+struct ta_cp_context {
+	struct ta_context		context;
+	struct mutex			mutex;
+};
+
+struct psp_xgmi_context {
+	struct ta_context		context;
+	struct psp_xgmi_topology_info	top_info;
+	bool				supports_extended_data;
+	uint8_t				xgmi_ta_caps;
+};
+
+struct psp_ras_context {
+	struct ta_context		context;
+	struct amdgpu_ras		*ras;
+	struct mutex			mutex;
+};
+
+#define MEM_TRAIN_SYSTEM_SIGNATURE		0x54534942
+#define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES	0x1000
+#define GDDR6_MEM_TRAINING_OFFSET		0x8000
+/*Define the VRAM size that will be encroached by BIST training.*/
+#define BIST_MEM_TRAINING_ENCROACHED_SIZE	0x2000000
+
+enum psp_memory_training_init_flag {
+	PSP_MEM_TRAIN_NOT_SUPPORT	= 0x0,
+	PSP_MEM_TRAIN_SUPPORT		= 0x1,
+	PSP_MEM_TRAIN_INIT_FAILED	= 0x2,
+	PSP_MEM_TRAIN_RESERVE_SUCCESS	= 0x4,
+	PSP_MEM_TRAIN_INIT_SUCCESS	= 0x8,
+};
+
+enum psp_memory_training_ops {
+	PSP_MEM_TRAIN_SEND_LONG_MSG	= 0x1,
+	PSP_MEM_TRAIN_SAVE		= 0x2,
+	PSP_MEM_TRAIN_RESTORE		= 0x4,
+	PSP_MEM_TRAIN_SEND_SHORT_MSG	= 0x8,
+	PSP_MEM_TRAIN_COLD_BOOT		= PSP_MEM_TRAIN_SEND_LONG_MSG,
+	PSP_MEM_TRAIN_RESUME		= PSP_MEM_TRAIN_SEND_SHORT_MSG,
+};
+
+struct psp_memory_training_context {
+	/*training data size*/
+	u64 train_data_size;
+	/*
+	 * sys_cache
+	 * cpu virtual address
+	 * system memory buffer that used to store the training data.
+	 */
+	void *sys_cache;
+
+	/*vram offset of the p2c training data*/
+	u64 p2c_train_data_offset;
+
+	/*vram offset of the c2p training data*/
+	u64 c2p_train_data_offset;
+	struct amdgpu_bo *c2p_bo;
+
+	enum psp_memory_training_init_flag init;
+	u32 training_cnt;
+	bool enable_mem_training;
+};
+
+/** PSP runtime DB **/
+#define PSP_RUNTIME_DB_SIZE_IN_BYTES		0x10000
+#define PSP_RUNTIME_DB_OFFSET			0x100000
+#define PSP_RUNTIME_DB_COOKIE_ID		0x0ed5
+#define PSP_RUNTIME_DB_VER_1			0x0100
+#define PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT	0x40
+
+enum psp_runtime_entry_type {
+	PSP_RUNTIME_ENTRY_TYPE_INVALID		= 0x0,
+	PSP_RUNTIME_ENTRY_TYPE_TEST		= 0x1,
+	PSP_RUNTIME_ENTRY_TYPE_MGPU_COMMON	= 0x2,  /* Common mGPU runtime data */
+	PSP_RUNTIME_ENTRY_TYPE_MGPU_WAFL	= 0x3,  /* WAFL runtime data */
+	PSP_RUNTIME_ENTRY_TYPE_MGPU_XGMI	= 0x4,  /* XGMI runtime data */
+	PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG	= 0x5,  /* Boot Config runtime data */
+	PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS = 0x6, /* SCPM validation data */
+};
+
+/* PSP runtime DB header */
+struct psp_runtime_data_header {
+	/* determine the existence of runtime db */
+	uint16_t cookie;
+	/* version of runtime db */
+	uint16_t version;
+};
+
+/* PSP runtime DB entry */
+struct psp_runtime_entry {
+	/* type of runtime db entry */
+	uint32_t entry_type;
+	/* offset of entry in bytes */
+	uint16_t offset;
+	/* size of entry in bytes */
+	uint16_t size;
+};
+
+/* PSP runtime DB directory */
+struct psp_runtime_data_directory {
+	/* number of valid entries */
+	uint16_t			entry_count;
+	/* db entries*/
+	struct psp_runtime_entry	entry_list[PSP_RUNTIME_DB_DIAG_ENTRY_MAX_COUNT];
+};
+
+/* PSP runtime DB boot config feature bitmask */
+enum psp_runtime_boot_cfg_feature {
+	BOOT_CFG_FEATURE_GECC                       = 0x1,
+	BOOT_CFG_FEATURE_TWO_STAGE_DRAM_TRAINING    = 0x2,
+};
+
+/* PSP run time DB SCPM authentication defines */
+enum psp_runtime_scpm_authentication {
+	SCPM_DISABLE                     = 0x0,
+	SCPM_ENABLE                      = 0x1,
+	SCPM_ENABLE_WITH_SCPM_ERR        = 0x2,
+};
+
+/* PSP runtime DB boot config entry */
+struct psp_runtime_boot_cfg_entry {
+	uint32_t boot_cfg_bitmask;
+	uint32_t reserved;
+};
+
+/* PSP runtime DB SCPM entry */
+struct psp_runtime_scpm_entry {
+	enum psp_runtime_scpm_authentication scpm_status;
+};
+
+#if defined(CONFIG_DEBUG_FS)
+struct spirom_bo {
+	struct amdgpu_bo *bo;
+	uint64_t mc_addr;
+	void *cpu_addr;
+};
+#endif
+
+struct psp_context {
+	struct amdgpu_device		*adev;
+	struct psp_ring			km_ring;
+	struct psp_gfx_cmd_resp		*cmd;
+
+	const struct psp_funcs		*funcs;
+	const struct ta_funcs		*ta_funcs;
+
+	/* firmware buffer */
+	struct amdgpu_bo		*fw_pri_bo;
+	uint64_t			fw_pri_mc_addr;
+	void				*fw_pri_buf;
+
+	/* sos firmware */
+	const struct firmware		*sos_fw;
+	struct psp_bin_desc		sys;
+	struct psp_bin_desc		sos;
+	struct psp_bin_desc		toc;
+	struct psp_bin_desc		kdb;
+	struct psp_bin_desc		spl;
+	struct psp_bin_desc		rl;
+	struct psp_bin_desc		soc_drv;
+	struct psp_bin_desc		intf_drv;
+	struct psp_bin_desc		dbg_drv;
+	struct psp_bin_desc		ras_drv;
+	struct psp_bin_desc		ipkeymgr_drv;
+	struct psp_bin_desc		spdm_drv;
+
+	/* tmr buffer */
+	struct amdgpu_bo		*tmr_bo;
+	uint64_t			tmr_mc_addr;
+
+	/* asd firmware */
+	const struct firmware		*asd_fw;
+
+	/* toc firmware */
+	const struct firmware		*toc_fw;
+
+	/* cap firmware */
+	const struct firmware		*cap_fw;
+
+	/* fence buffer */
+	struct amdgpu_bo		*fence_buf_bo;
+	uint64_t			fence_buf_mc_addr;
+	void				*fence_buf;
+
+	/* cmd buffer */
+	struct amdgpu_bo		*cmd_buf_bo;
+	uint64_t			cmd_buf_mc_addr;
+	struct psp_gfx_cmd_resp		*cmd_buf_mem;
+
+	/* fence value associated with cmd buffer */
+	atomic_t			fence_value;
+	/* flag to mark whether gfx fw autoload is supported or not */
+	bool				autoload_supported;
+	/* flag to mark whether psp use runtime TMR or boottime TMR */
+	bool				boot_time_tmr;
+	/* flag to mark whether df cstate management centralized to PMFW */
+	bool				pmfw_centralized_cstate_management;
+
+	/* xgmi ta firmware and buffer */
+	const struct firmware		*ta_fw;
+	uint32_t			ta_fw_version;
+
+	uint32_t			cap_fw_version;
+	uint32_t			cap_feature_version;
+	uint32_t			cap_ucode_size;
+
+	struct ta_context		asd_context;
+	struct psp_xgmi_context		xgmi_context;
+	struct psp_ras_context		ras_context;
+	struct ta_cp_context		hdcp_context;
+	struct ta_cp_context		dtm_context;
+	struct ta_cp_context		rap_context;
+	struct ta_cp_context		securedisplay_context;
+	struct mutex			mutex;
+	struct psp_memory_training_context mem_train_ctx;
+
+	uint32_t			boot_cfg_bitmask;
+
+	/* firmware upgrades supported */
+	bool				sup_pd_fw_up;
+	bool				sup_ifwi_up;
+
+	char				*vbflash_tmp_buf;
+	size_t				vbflash_image_size;
+	bool				vbflash_done;
+#if defined(CONFIG_DEBUG_FS)
+	struct spirom_bo *spirom_dump_trip;
+#endif
+};
+
+struct amdgpu_psp_funcs {
+	bool (*check_fw_loading_status)(struct amdgpu_device *adev,
+					enum AMDGPU_UCODE_ID);
+};
+
+
+#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
+#define psp_init_microcode(psp) \
+		((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
+#define psp_bootloader_load_kdb(psp) \
+		((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0)
+#define psp_bootloader_load_spl(psp) \
+		((psp)->funcs->bootloader_load_spl ? (psp)->funcs->bootloader_load_spl((psp)) : 0)
+#define psp_bootloader_load_sysdrv(psp) \
+		((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
+#define psp_bootloader_load_soc_drv(psp) \
+		((psp)->funcs->bootloader_load_soc_drv ? (psp)->funcs->bootloader_load_soc_drv((psp)) : 0)
+#define psp_bootloader_load_intf_drv(psp) \
+		((psp)->funcs->bootloader_load_intf_drv ? (psp)->funcs->bootloader_load_intf_drv((psp)) : 0)
+#define psp_bootloader_load_dbg_drv(psp) \
+		((psp)->funcs->bootloader_load_dbg_drv ? (psp)->funcs->bootloader_load_dbg_drv((psp)) : 0)
+#define psp_bootloader_load_ras_drv(psp) \
+		((psp)->funcs->bootloader_load_ras_drv ? \
+		(psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_ipkeymgr_drv(psp) \
+		((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
+		 (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+		((psp)->funcs->bootloader_load_spdm_drv ? \
+		 (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
+#define psp_bootloader_load_sos(psp) \
+		((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
+#define psp_smu_reload_quirk(psp) \
+		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
+#define psp_mode1_reset(psp) \
+		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
+#define psp_mem_training(psp, ops) \
+	((psp)->funcs->mem_training ? (psp)->funcs->mem_training((psp), (ops)) : 0)
+
+#define psp_ring_get_wptr(psp) (psp)->funcs->ring_get_wptr((psp))
+#define psp_ring_set_wptr(psp, value) (psp)->funcs->ring_set_wptr((psp), (value))
+
+#define psp_load_usbc_pd_fw(psp, fw_pri_mc_addr) \
+	((psp)->funcs->load_usbc_pd_fw ? \
+	(psp)->funcs->load_usbc_pd_fw((psp), (fw_pri_mc_addr)) : -EINVAL)
+
+#define psp_read_usbc_pd_fw(psp, fw_ver) \
+	((psp)->funcs->read_usbc_pd_fw ? \
+	(psp)->funcs->read_usbc_pd_fw((psp), fw_ver) : -EINVAL)
+
+#define psp_update_spirom(psp, fw_pri_mc_addr) \
+	((psp)->funcs->update_spirom ? \
+	(psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_dump_spirom(psp, fw_pri_mc_addr) \
+	((psp)->funcs->dump_spirom ? \
+	(psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
+#define psp_vbflash_status(psp) \
+	((psp)->funcs->vbflash_stat ? \
+	(psp)->funcs->vbflash_stat((psp)) : -EINVAL)
+
+#define psp_fatal_error_recovery_quirk(psp) \
+	((psp)->funcs->fatal_error_recovery_quirk ? \
+	(psp)->funcs->fatal_error_recovery_quirk((psp)) : 0)
+
+#define psp_is_aux_sos_load_required(psp) \
+	((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+
+#define psp_reg_program_no_ring(psp, val, id) \
+	((psp)->funcs->reg_program_no_ring ? \
+	(psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
+extern const struct amd_ip_funcs psp_ip_funcs;
+
+extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
+extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
+extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
+extern const struct amdgpu_ip_block_version psp_v14_0_ip_block;
+
+int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
+		 uint32_t field_val, uint32_t mask, uint32_t flags);
+extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index,
+			uint32_t field_val, uint32_t mask, uint32_t msec_timeout);
+
+int psp_execute_ip_fw_load(struct psp_context *psp,
+			   struct amdgpu_firmware_info *ucode);
+
+int psp_gpu_reset(struct amdgpu_device *adev);
+
+int psp_ta_init_shared_buf(struct psp_context *psp,
+				  struct ta_mem_context *mem_ctx);
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx);
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
+int psp_ta_load(struct psp_context *psp, struct ta_context *context);
+int psp_ta_invoke(struct psp_context *psp,
+			uint32_t ta_cmd_id,
+			struct ta_context *context);
+
+int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
+int psp_xgmi_terminate(struct psp_context *psp);
+int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id);
+int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id);
+int psp_xgmi_get_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology,
+			       bool get_extended_data);
+int psp_xgmi_set_topology_info(struct psp_context *psp,
+			       int number_devices,
+			       struct psp_xgmi_topology_info *topology);
+int psp_ras_initialize(struct psp_context *psp);
+int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_ras_enable_features(struct psp_context *psp,
+		union ta_ras_cmd_input *info, bool enable);
+int psp_ras_trigger_error(struct psp_context *psp,
+			  struct ta_ras_trigger_error_input *info, uint32_t instance_mask);
+int psp_ras_terminate(struct psp_context *psp);
+int psp_ras_query_address(struct psp_context *psp,
+			  struct ta_ras_query_address_input *addr_in,
+			  struct ta_ras_query_address_output *addr_out);
+
+int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+int psp_rap_invoke(struct psp_context *psp, uint32_t ta_cmd_id, enum ta_rap_status *status);
+int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
+
+int psp_rlc_autoload_start(struct psp_context *psp);
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+		uint32_t value);
+int psp_ring_cmd_submit(struct psp_context *psp,
+			uint64_t cmd_buf_mc_addr,
+			uint64_t fence_mc_addr,
+			int index);
+int psp_init_asd_microcode(struct psp_context *psp,
+			   const char *chip_name);
+int psp_init_toc_microcode(struct psp_context *psp,
+			   const char *chip_name);
+int psp_init_sos_microcode(struct psp_context *psp,
+			   const char *chip_name);
+int psp_init_ta_microcode(struct psp_context *psp,
+			  const char *chip_name);
+int psp_init_cap_microcode(struct psp_context *psp,
+			  const char *chip_name);
+int psp_get_fw_attestation_records_addr(struct psp_context *psp,
+					uint64_t *output_ptr);
+int psp_update_fw_reservation(struct psp_context *psp);
+int psp_load_fw_list(struct psp_context *psp,
+		     struct amdgpu_firmware_info **ucode_list, int ucode_count);
+void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
+
+int psp_spatial_partition(struct psp_context *psp, int mode);
+int psp_memory_partition(struct psp_context *psp, int mode);
+
+int is_psp_fw_valid(struct psp_bin_desc bin);
+
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
+
+int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
+	bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+				   enum psp_reg_prog_id id);
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
new file mode 100644
index 000000000000..6e8aad91bcd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf,
+					    size_t len, loff_t *off);
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf,
+					    size_t len, loff_t *off);
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf,
+					    size_t len, loff_t *off);
+
+static uint32_t get_bin_version(const uint8_t *bin)
+{
+	const struct common_firmware_header *hdr =
+			     (const struct common_firmware_header *)bin;
+
+	return hdr->ucode_version;
+}
+
+static int prep_ta_mem_context(struct ta_mem_context *mem_context,
+					     uint8_t *shared_buf,
+					     uint32_t shared_buf_len)
+{
+	if (mem_context->shared_mem_size < shared_buf_len)
+		return -EINVAL;
+	memset(mem_context->shared_buf, 0, mem_context->shared_mem_size);
+	memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len);
+
+	return 0;
+}
+
+static bool is_ta_type_valid(enum ta_type_id ta_type)
+{
+	switch (ta_type) {
+	case TA_TYPE_RAS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct ta_funcs ras_ta_funcs = {
+	.fn_ta_initialize = psp_ras_initialize,
+	.fn_ta_invoke    = psp_ras_invoke,
+	.fn_ta_terminate = psp_ras_terminate
+};
+
+static void set_ta_context_funcs(struct psp_context *psp,
+						      enum ta_type_id ta_type,
+						      struct ta_context **pcontext)
+{
+	switch (ta_type) {
+	case TA_TYPE_RAS:
+		*pcontext = &psp->ras_context.context;
+		psp->ta_funcs = &ras_ta_funcs;
+		break;
+	default:
+		break;
+	}
+}
+
+static const struct file_operations ta_load_debugfs_fops = {
+	.write  = ta_if_load_debugfs_write,
+	.llseek = default_llseek,
+	.owner  = THIS_MODULE
+};
+
+static const struct file_operations ta_unload_debugfs_fops = {
+	.write  = ta_if_unload_debugfs_write,
+	.llseek = default_llseek,
+	.owner  = THIS_MODULE
+};
+
+static const struct file_operations ta_invoke_debugfs_fops = {
+	.write  = ta_if_invoke_debugfs_write,
+	.llseek = default_llseek,
+	.owner  = THIS_MODULE
+};
+
+/*
+ * DOC: AMDGPU TA debugfs interfaces
+ *
+ * Three debugfs interfaces can be opened by a program to
+ * load/invoke/unload TA,
+ *
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_load
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload
+ *
+ * How to use the interfaces in a program?
+ *
+ * A program needs to provide transmit buffer to the interfaces
+ * and will receive buffer from the interfaces below,
+ *
+ * - For TA load debugfs interface:
+ *   Transmit buffer:
+ *    - TA type (4bytes)
+ *    - TA bin length (4bytes)
+ *    - TA bin
+ *   Receive buffer:
+ *    - TA ID (4bytes)
+ *
+ * - For TA invoke debugfs interface:
+ *   Transmit buffer:
+ *    - TA type (4bytes)
+ *    - TA ID (4bytes)
+ *    - TA CMD ID (4bytes)
+ *    - TA shard buf length
+ *      (4bytes, value not beyond TA shared memory size)
+ *    - TA shared buf
+ *   Receive buffer:
+ *    - TA shared buf
+ *
+ * - For TA unload debugfs interface:
+ *   Transmit buffer:
+ *    - TA type (4bytes)
+ *    - TA ID (4bytes)
+ */
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+	uint32_t ta_type    = 0;
+	uint32_t ta_bin_len = 0;
+	uint8_t  *ta_bin    = NULL;
+	uint32_t copy_pos   = 0;
+	int      ret        = 0;
+
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
+
+	if (!buf)
+		return -EINVAL;
+
+	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+	if (ret || (!is_ta_type_valid(ta_type)))
+		return -EFAULT;
+
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+
+	if (ta_bin_len > PSP_1_MEG)
+		return -EINVAL;
+
+	copy_pos += sizeof(uint32_t);
+
+	ta_bin = memdup_user(&buf[copy_pos], ta_bin_len);
+	if (IS_ERR(ta_bin))
+		return PTR_ERR(ta_bin);
+
+	/* Set TA context and functions */
+	set_ta_context_funcs(psp, ta_type, &context);
+
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+		dev_err(adev->dev, "Unsupported function to terminate TA\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_bin;
+	}
+
+	/*
+	 * Allocate TA shared buf in case shared buf was freed
+	 * due to loading TA failed before.
+	 */
+	if (!context->mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &context->mem_context);
+		if (ret) {
+			ret = -ENOMEM;
+			goto err_free_bin;
+		}
+	}
+
+	ret = psp_fn_ta_terminate(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev,
+			"Failed to unload embedded TA (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret)
+			ret = -EINVAL;
+		goto err_free_ta_shared_buf;
+	}
+
+	/* Prepare TA context for TA initialization */
+	context->ta_type                     = ta_type;
+	context->bin_desc.fw_version         = get_bin_version(ta_bin);
+	context->bin_desc.size_bytes         = ta_bin_len;
+	context->bin_desc.start_addr         = ta_bin;
+
+	if (!psp->ta_funcs->fn_ta_initialize) {
+		dev_err(adev->dev, "Unsupported function to initialize TA\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_ta_shared_buf;
+	}
+
+	ret = psp_fn_ta_initialize(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret)
+			ret = -EINVAL;
+		goto err_free_ta_shared_buf;
+	}
+
+	if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))
+		ret = -EFAULT;
+
+err_free_ta_shared_buf:
+	/* Only free TA shared buf when returns error code */
+	if (ret && context->mem_context.shared_buf)
+		psp_ta_free_shared_buf(&context->mem_context);
+err_free_bin:
+	kfree(ta_bin);
+
+	return ret;
+}
+
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+	uint32_t ta_type    = 0;
+	uint32_t ta_id      = 0;
+	uint32_t copy_pos   = 0;
+	int      ret        = 0;
+
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
+
+	if (!buf)
+		return -EINVAL;
+
+	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+	if (ret || (!is_ta_type_valid(ta_type)))
+		return -EFAULT;
+
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+
+	set_ta_context_funcs(psp, ta_type, &context);
+	context->session_id = ta_id;
+
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+		dev_err(adev->dev, "Unsupported function to terminate TA\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = psp_fn_ta_terminate(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret)
+			ret = -EINVAL;
+	}
+
+	if (context->mem_context.shared_buf)
+		psp_ta_free_shared_buf(&context->mem_context);
+
+	return ret;
+}
+
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+	uint32_t ta_type        = 0;
+	uint32_t ta_id          = 0;
+	uint32_t cmd_id         = 0;
+	uint32_t shared_buf_len = 0;
+	uint8_t *shared_buf     = NULL;
+	uint32_t copy_pos       = 0;
+	int      ret            = 0;
+
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
+
+	if (!buf)
+		return -EINVAL;
+
+	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+	copy_pos += sizeof(uint32_t);
+
+	shared_buf = memdup_user(&buf[copy_pos], shared_buf_len);
+	if (IS_ERR(shared_buf))
+		return PTR_ERR(shared_buf);
+
+	set_ta_context_funcs(psp, ta_type, &context);
+
+	if (!context || !context->initialized) {
+		dev_err(adev->dev, "TA is not initialized\n");
+		ret = -EINVAL;
+		goto err_free_shared_buf;
+	}
+
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
+		dev_err(adev->dev, "Unsupported function to invoke TA\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_shared_buf;
+	}
+
+	context->session_id = ta_id;
+
+	mutex_lock(&psp->ras_context.mutex);
+	ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
+	if (ret)
+		goto err_free_shared_buf;
+
+	ret = psp_fn_ta_invoke(psp, cmd_id);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret) {
+			ret = -EINVAL;
+			goto err_free_shared_buf;
+		}
+	}
+
+	if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len))
+		ret = -EFAULT;
+
+err_free_shared_buf:
+	mutex_unlock(&psp->ras_context.mutex);
+	kfree(shared_buf);
+
+	return ret;
+}
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+	struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root);
+
+	debugfs_create_file("ta_load", 0200, dir, adev,
+				     &ta_load_debugfs_fops);
+
+	debugfs_create_file("ta_unload", 0200, dir,
+				     adev, &ta_unload_debugfs_fops);
+
+	debugfs_create_file("ta_invoke", 0200, dir,
+				     adev, &ta_invoke_debugfs_fops);
+}
+
+#else
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
new file mode 100644
index 000000000000..14cd1c81c3e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PSP_TA_H__
+#define __AMDGPU_PSP_TA_H__
+
+/* Calling set_ta_context_funcs is required before using the following macros */
+#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp)))
+#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id)))
+#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp)))
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
new file mode 100644
index 000000000000..bacf888735db
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_rap.h"
+
+/**
+ * DOC: AMDGPU RAP debugfs test interface
+ *
+ * how to use?
+ * echo opcode > <debugfs_dir>/dri/xxx/rap_test
+ *
+ * opcode:
+ * currently, only 2 is supported by Linux host driver,
+ * opcode 2 stands for TA_CMD_RAP__VALIDATE_L0, used to
+ * trigger L0 policy validation, you can refer more detail
+ * from header file ta_rap_if.h
+ *
+ */
+static ssize_t amdgpu_rap_debugfs_write(struct file *f, const char __user *buf,
+		size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct ta_rap_shared_memory *rap_shared_mem;
+	struct ta_rap_cmd_output_data *rap_cmd_output;
+	struct drm_device *dev = adev_to_drm(adev);
+	uint32_t op;
+	enum ta_rap_status status;
+	int ret;
+
+	if (*pos || size != 2)
+		return -EINVAL;
+
+	ret = kstrtouint_from_user(buf, size, *pos, &op);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return ret;
+	}
+
+	/* make sure gfx core is on, RAP TA cann't handle
+	 * GFX OFF case currently.
+	 */
+	amdgpu_gfx_off_ctrl(adev, false);
+
+	switch (op) {
+	case 2:
+		ret = psp_rap_invoke(&adev->psp, op, &status);
+		if (!ret && status == TA_RAP_STATUS__SUCCESS) {
+			dev_info(adev->dev, "RAP L0 validate test success.\n");
+		} else {
+			rap_shared_mem = (struct ta_rap_shared_memory *)
+					 adev->psp.rap_context.context.mem_context.shared_buf;
+			rap_cmd_output = &(rap_shared_mem->rap_out_message.output);
+
+			dev_info(adev->dev, "RAP test failed, the output is:\n");
+			dev_info(adev->dev, "\tlast_subsection: 0x%08x.\n",
+				 rap_cmd_output->last_subsection);
+			dev_info(adev->dev, "\tnum_total_validate: 0x%08x.\n",
+				 rap_cmd_output->num_total_validate);
+			dev_info(adev->dev, "\tnum_valid: 0x%08x.\n",
+				 rap_cmd_output->num_valid);
+			dev_info(adev->dev, "\tlast_validate_addr: 0x%08x.\n",
+				 rap_cmd_output->last_validate_addr);
+			dev_info(adev->dev, "\tlast_validate_val: 0x%08x.\n",
+				 rap_cmd_output->last_validate_val);
+			dev_info(adev->dev, "\tlast_validate_val_exptd: 0x%08x.\n",
+				 rap_cmd_output->last_validate_val_exptd);
+		}
+		break;
+	default:
+		dev_info(adev->dev, "Unsupported op id: %d, ", op);
+		dev_info(adev->dev, "Only support op 2(L0 validate test).\n");
+		break;
+	}
+
+	amdgpu_gfx_off_ctrl(adev, true);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return size;
+}
+
+static const struct file_operations amdgpu_rap_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_rap_debugfs_write,
+	.llseek = default_llseek
+};
+
+void amdgpu_rap_debugfs_init(struct amdgpu_device *adev)
+{
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+	if (!adev->psp.rap_context.context.initialized)
+		return;
+
+	debugfs_create_file("rap_test", S_IWUSR, minor->debugfs_root,
+				adev, &amdgpu_rap_debugfs_ops);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h
new file mode 100644
index 000000000000..ec6d7632d3a0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_RAP_H
+#define _AMDGPU_RAP_H
+
+#include "amdgpu.h"
+
+void amdgpu_rap_debugfs_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
new file mode 100644
index 000000000000..2a6cf7963dde
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -0,0 +1,5748 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/reboot.h>
+#include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
+#include <linux/list_sort.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_xgmi.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
+#include "nbio_v7_9.h"
+#include "atom.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ras_mgr.h"
+
+#ifdef CONFIG_X86_MCE_AMD
+#include <asm/mce.h>
+
+static bool notifier_registered;
+#endif
+static const char *RAS_FS_NAME = "ras";
+
+const char *ras_error_string[] = {
+	"none",
+	"parity",
+	"single_correctable",
+	"multi_uncorrectable",
+	"poison",
+};
+
+const char *ras_block_string[] = {
+	"umc",
+	"sdma",
+	"gfx",
+	"mmhub",
+	"athub",
+	"pcie_bif",
+	"hdp",
+	"xgmi_wafl",
+	"df",
+	"smn",
+	"sem",
+	"mp0",
+	"mp1",
+	"fuse",
+	"mca",
+	"vcn",
+	"jpeg",
+	"ih",
+	"mpio",
+	"mmsch",
+};
+
+const char *ras_mca_block_string[] = {
+	"mca_mp0",
+	"mca_mp1",
+	"mca_mpio",
+	"mca_iohc",
+};
+
+struct amdgpu_ras_block_list {
+	/* ras block link */
+	struct list_head node;
+
+	struct amdgpu_ras_block_object *ras_obj;
+};
+
+const char *get_ras_block_str(struct ras_common_if *ras_block)
+{
+	if (!ras_block)
+		return "NULL";
+
+	if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT ||
+	    ras_block->block >= ARRAY_SIZE(ras_block_string))
+		return "OUT OF RANGE";
+
+	if (ras_block->block == AMDGPU_RAS_BLOCK__MCA)
+		return ras_mca_block_string[ras_block->sub_block_index];
+
+	return ras_block_string[ras_block->block];
+}
+
+#define ras_block_str(_BLOCK_) \
+	(((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range")
+
+#define ras_err_str(i) (ras_error_string[ffs(i)])
+
+#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+
+/* inject address is 52 bits */
+#define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
+
+/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
+#define RAS_BAD_PAGE_COVER              (100 * 1024 * 1024ULL)
+
+#define MAX_UMC_POISON_POLLING_TIME_ASYNC  10
+
+#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100  //ms
+
+#define MAX_FLUSH_RETIRE_DWORK_TIMES  100
+
+#define BYPASS_ALLOCATED_ADDRESS        0x0
+#define BYPASS_INITIALIZATION_ADDRESS   0x1
+
+enum amdgpu_ras_retire_page_reservation {
+	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
+	AMDGPU_RAS_RETIRE_PAGE_PENDING,
+	AMDGPU_RAS_RETIRE_PAGE_FAULT,
+};
+
+atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+				uint64_t addr);
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+				uint64_t addr);
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev);
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
+
+#ifdef CONFIG_X86_MCE_AMD
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
+static void
+amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
+struct mce_notifier_adev_list {
+	struct amdgpu_device *devs[MAX_GPU_INSTANCE];
+	int num_gpu;
+};
+static struct mce_notifier_adev_list mce_adev_list;
+#endif
+
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)
+{
+	if (adev && amdgpu_ras_get_context(adev))
+		amdgpu_ras_get_context(adev)->error_query_ready = ready;
+}
+
+static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
+{
+	if (adev && amdgpu_ras_get_context(adev))
+		return amdgpu_ras_get_context(adev)->error_query_ready;
+
+	return false;
+}
+
+static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address)
+{
+	struct ras_err_data err_data;
+	struct eeprom_table_record err_rec;
+	int ret;
+
+	ret = amdgpu_ras_check_bad_page(adev, address);
+	if (ret == -EINVAL) {
+		dev_warn(adev->dev,
+			"RAS WARN: input address 0x%llx is invalid.\n",
+			address);
+		return -EINVAL;
+	} else if (ret == 1) {
+		dev_warn(adev->dev,
+			"RAS WARN: 0x%llx has already been marked as bad page!\n",
+			address);
+		return 0;
+	}
+
+	ret = amdgpu_ras_error_data_init(&err_data);
+	if (ret)
+		return ret;
+
+	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
+	err_data.err_addr = &err_rec;
+	amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0);
+
+	if (amdgpu_bad_page_threshold != 0) {
+		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+					 err_data.err_addr_cnt, false);
+		amdgpu_ras_save_bad_pages(adev, NULL);
+	}
+
+	amdgpu_ras_error_data_fini(&err_data);
+
+	dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n");
+	dev_warn(adev->dev, "Clear EEPROM:\n");
+	dev_warn(adev->dev, "    echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
+
+	return 0;
+}
+
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+			uint64_t address, uint64_t flags)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_vram_block_info blk_info;
+	uint64_t page_pfns[32] = {0};
+	int i, ret, count;
+	bool hit = false;
+
+	if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+		return 0;
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (amdgpu_virt_check_vf_critical_region(adev, address, &hit))
+			return -EPERM;
+		return hit ? -EACCES : 0;
+	}
+
+	if ((address >= adev->gmc.mc_vram_size) ||
+	    (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+		return -EFAULT;
+
+	count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+				address, page_pfns, ARRAY_SIZE(page_pfns));
+	if (count <= 0)
+		return -EPERM;
+
+	for (i = 0; i < count; i++) {
+		memset(&blk_info, 0, sizeof(blk_info));
+		ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+					page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+		if (!ret) {
+			/* The input address that needs to be checked is allocated by
+			 * current calling process, so it is necessary to exclude
+			 * the calling process.
+			 */
+			if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+			    ((blk_info.task.pid != task_pid_nr(current)) ||
+				strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+				return -EACCES;
+			else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+				(blk_info.task.pid == con->init_task_pid) &&
+				!strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+				return -EACCES;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct ras_manager *obj = (struct ras_manager *)file_inode(f)->i_private;
+	struct ras_query_if info = {
+		.head = obj->head,
+	};
+	ssize_t s;
+	char val[128];
+
+	if (amdgpu_ras_query_error_status(obj->adev, &info))
+		return -EINVAL;
+
+	/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
+	if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+	    amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+	}
+
+	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
+			"ue", info.ue_count,
+			"ce", info.ce_count);
+	if (*pos >= s)
+		return 0;
+
+	s -= *pos;
+	s = min_t(u64, s, size);
+
+
+	if (copy_to_user(buf, &val[*pos], s))
+		return -EINVAL;
+
+	*pos += s;
+
+	return s;
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_ras_debugfs_read,
+	.write = NULL,
+	.llseek = default_llseek
+};
+
+static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
+		*block_id = i;
+		if (strcmp(name, ras_block_string[i]) == 0)
+			return 0;
+	}
+	return -EINVAL;
+}
+
+static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
+		const char __user *buf, size_t size,
+		loff_t *pos, struct ras_debug_if *data)
+{
+	ssize_t s = min_t(u64, 64, size);
+	char str[65];
+	char block_name[33];
+	char err[9] = "ue";
+	int op = -1;
+	int block_id;
+	uint32_t sub_block;
+	u64 address, value;
+	/* default value is 0 if the mask is not set by user */
+	u32 instance_mask = 0;
+
+	if (*pos)
+		return -EINVAL;
+	*pos = size;
+
+	memset(str, 0, sizeof(str));
+	memset(data, 0, sizeof(*data));
+
+	if (copy_from_user(str, buf, s))
+		return -EINVAL;
+
+	if (sscanf(str, "disable %32s", block_name) == 1)
+		op = 0;
+	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
+		op = 1;
+	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
+		op = 2;
+	else if (strstr(str, "retire_page") != NULL)
+		op = 3;
+	else if (strstr(str, "check_address") != NULL)
+		op = 4;
+	else if (str[0] && str[1] && str[2] && str[3])
+		/* ascii string, but commands are not matched. */
+		return -EINVAL;
+
+	if (op != -1) {
+		if (op == 3) {
+			if (sscanf(str, "%*s 0x%llx", &address) != 1 &&
+			    sscanf(str, "%*s %llu", &address) != 1)
+				return -EINVAL;
+
+			data->op = op;
+			data->inject.address = address;
+
+			return 0;
+		} else if (op == 4) {
+			if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+			    sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+				return -EINVAL;
+
+			data->op = op;
+			data->inject.address = address;
+			data->inject.value = value;
+			return 0;
+		}
+
+		if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
+			return -EINVAL;
+
+		data->head.block = block_id;
+		/* only ue, ce and poison errors are supported */
+		if (!memcmp("ue", err, 2))
+			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		else if (!memcmp("ce", err, 2))
+			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
+		else if (!memcmp("poison", err, 6))
+			data->head.type = AMDGPU_RAS_ERROR__POISON;
+		else
+			return -EINVAL;
+
+		data->op = op;
+
+		if (op == 2) {
+			if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x",
+				   &sub_block, &address, &value, &instance_mask) != 4 &&
+			    sscanf(str, "%*s %*s %*s %u %llu %llu %u",
+				   &sub_block, &address, &value, &instance_mask) != 4 &&
+				sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx",
+				   &sub_block, &address, &value) != 3 &&
+			    sscanf(str, "%*s %*s %*s %u %llu %llu",
+				   &sub_block, &address, &value) != 3)
+				return -EINVAL;
+			data->head.sub_block_index = sub_block;
+			data->inject.address = address;
+			data->inject.value = value;
+			data->inject.instance_mask = instance_mask;
+		}
+	} else {
+		if (size < sizeof(*data))
+			return -EINVAL;
+
+		if (copy_from_user(data, buf, sizeof(*data)))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev,
+				struct ras_debug_if *data)
+{
+	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
+	uint32_t mask, inst_mask = data->inject.instance_mask;
+
+	/* no need to set instance mask if there is only one instance */
+	if (num_xcc <= 1 && inst_mask) {
+		data->inject.instance_mask = 0;
+		dev_dbg(adev->dev,
+			"RAS inject mask(0x%x) isn't supported and force it to 0.\n",
+			inst_mask);
+
+		return;
+	}
+
+	switch (data->head.block) {
+	case AMDGPU_RAS_BLOCK__GFX:
+		mask = GENMASK(num_xcc - 1, 0);
+		break;
+	case AMDGPU_RAS_BLOCK__SDMA:
+		mask = GENMASK(adev->sdma.num_instances - 1, 0);
+		break;
+	case AMDGPU_RAS_BLOCK__VCN:
+	case AMDGPU_RAS_BLOCK__JPEG:
+		mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0);
+		break;
+	default:
+		mask = inst_mask;
+		break;
+	}
+
+	/* remove invalid bits in instance mask */
+	data->inject.instance_mask &= mask;
+	if (inst_mask != data->inject.instance_mask)
+		dev_dbg(adev->dev,
+			"Adjust RAS inject mask 0x%x to 0x%x\n",
+			inst_mask, data->inject.instance_mask);
+}
+
+/**
+ * DOC: AMDGPU RAS debugfs control interface
+ *
+ * The control interface accepts struct ras_debug_if which has two members.
+ *
+ * First member: ras_debug_if::head or ras_debug_if::inject.
+ *
+ * head is used to indicate which IP block will be under control.
+ *
+ * head has four members, they are block, type, sub_block_index, name.
+ * block: which IP will be under control.
+ * type: what kind of error will be enabled/disabled/injected.
+ * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
+ * name: the name of IP.
+ *
+ * inject has three more members than head, they are address, value and mask.
+ * As their names indicate, inject operation will write the
+ * value to the address.
+ *
+ * The second member: struct ras_debug_if::op.
+ * It has three kinds of operations.
+ *
+ * - 0: disable RAS on the block. Take ::head as its data.
+ * - 1: enable RAS on the block. Take ::head as its data.
+ * - 2: inject errors on the block. Take ::inject as its data.
+ *
+ * How to use the interface?
+ *
+ * In a program
+ *
+ * Copy the struct ras_debug_if in your code and initialize it.
+ * Write the struct to the control interface.
+ *
+ * From shell
+ *
+ * .. code-block:: bash
+ *
+ *	echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ *	echo "enable  <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ *	echo "inject  <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
+ *
+ * Where N, is the card which you want to affect.
+ *
+ * "disable" requires only the block.
+ * "enable" requires the block and error type.
+ * "inject" requires the block, error type, address, and value.
+ *
+ * The block is one of: umc, sdma, gfx, etc.
+ *	see ras_block_string[] for details
+ *
+ * The error type is one of: ue, ce and poison where,
+ *	ue is multi-uncorrectable
+ *	ce is single-correctable
+ *	poison is poison
+ *
+ * The sub-block is a the sub-block index, pass 0 if there is no sub-block.
+ * The address and value are hexadecimal numbers, leading 0x is optional.
+ * The mask means instance mask, is optional, default value is 0x1.
+ *
+ * For instance,
+ *
+ * .. code-block:: bash
+ *
+ *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ *	echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
+ *
+ * How to check the result of the operation?
+ *
+ * To check disable/enable, see "ras" features at,
+ * /sys/class/drm/card[0/1/2...]/device/ras/features
+ *
+ * To check inject, see the corresponding error count at,
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
+ *
+ * .. note::
+ *	Operations are only allowed on blocks which are supported.
+ *	Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
+ *	to see which blocks support RAS on a particular asic.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
+					     const char __user *buf,
+					     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct ras_debug_if data;
+	int ret = 0;
+
+	if (!amdgpu_ras_get_error_query_ready(adev)) {
+		dev_warn(adev->dev, "RAS WARN: error injection "
+				"currently inaccessible\n");
+		return size;
+	}
+
+	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
+	if (ret)
+		return ret;
+
+	if (data.op == 3) {
+		ret = amdgpu_reserve_page_direct(adev, data.inject.address);
+		if (!ret)
+			return size;
+		else
+			return ret;
+	} else if (data.op == 4) {
+		ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+		return ret ? ret : size;
+	}
+
+	if (!amdgpu_ras_is_supported(adev, data.head.block))
+		return -EINVAL;
+
+	switch (data.op) {
+	case 0:
+		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
+		break;
+	case 1:
+		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
+		break;
+	case 2:
+		/* umc ce/ue error injection for a bad page is not allowed */
+		if (data.head.block == AMDGPU_RAS_BLOCK__UMC)
+			ret = amdgpu_ras_check_bad_page(adev, data.inject.address);
+		if (ret == -EINVAL) {
+			dev_warn(adev->dev, "RAS WARN: input address 0x%llx is invalid.",
+					data.inject.address);
+			break;
+		} else if (ret == 1) {
+			dev_warn(adev->dev, "RAS WARN: inject: 0x%llx has already been marked as bad!\n",
+					data.inject.address);
+			break;
+		}
+
+		amdgpu_ras_instance_mask_check(adev, &data);
+
+		/* data.inject.address is offset instead of absolute gpu address */
+		ret = amdgpu_ras_error_inject(adev, &data.inject);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev);
+
+/**
+ * DOC: AMDGPU RAS debugfs EEPROM table reset interface
+ *
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages which experiences ECC errors in vram.  This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ *	echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
+ */
+static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f,
+					       const char __user *buf,
+					       size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev =
+		(struct amdgpu_device *)file_inode(f)->i_private;
+	int ret;
+
+	if (amdgpu_uniras_enabled(adev)) {
+		ret = amdgpu_uniras_clear_badpages_info(adev);
+		return ret ? ret : size;
+	}
+
+	ret = amdgpu_ras_eeprom_reset_table(
+		&(amdgpu_ras_get_context(adev)->eeprom_control));
+
+	if (!ret) {
+		/* Something was written to EEPROM.
+		 */
+		amdgpu_ras_get_context(adev)->flags = RAS_DEFAULT_FLAGS;
+		return size;
+	} else {
+		return ret;
+	}
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_ras_debugfs_ctrl_write,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_ras_debugfs_eeprom_write,
+	.llseek = default_llseek
+};
+
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows the user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ *	ue: 0
+ *	ce: 1
+ *
+ */
+static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct ras_manager *obj = container_of(attr, struct ras_manager, sysfs_attr);
+	struct ras_query_if info = {
+		.head = obj->head,
+	};
+
+	if (!amdgpu_ras_get_error_query_ready(obj->adev))
+		return sysfs_emit(buf, "Query currently inaccessible\n");
+
+	if (amdgpu_ras_query_error_status(obj->adev, &info))
+		return -EINVAL;
+
+	if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+	    amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+	}
+
+	if (info.head.block == AMDGPU_RAS_BLOCK__UMC)
+		return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+				"ce", info.ce_count, "de", info.de_count);
+	else
+		return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+				"ce", info.ce_count);
+}
+
+/* obj begin */
+
+#define get_obj(obj) do { (obj)->use++; } while (0)
+#define alive_obj(obj) ((obj)->use)
+
+static inline void put_obj(struct ras_manager *obj)
+{
+	if (obj && (--obj->use == 0)) {
+		list_del(&obj->node);
+		amdgpu_ras_error_data_fini(&obj->err_data);
+	}
+
+	if (obj && (obj->use < 0))
+		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));
+}
+
+/* make one obj and return it. */
+static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj;
+
+	if (!adev->ras_enabled || !con)
+		return NULL;
+
+	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
+		return NULL;
+
+	if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+		if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+			return NULL;
+
+		obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+	} else
+		obj = &con->objs[head->block];
+
+	/* already exist. return obj? */
+	if (alive_obj(obj))
+		return NULL;
+
+	if (amdgpu_ras_error_data_init(&obj->err_data))
+		return NULL;
+
+	obj->head = *head;
+	obj->adev = adev;
+	list_add(&obj->node, &con->head);
+	get_obj(obj);
+
+	return obj;
+}
+
+/* return an obj equal to head, or the first when head is NULL */
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj;
+	int i;
+
+	if (!adev->ras_enabled || !con)
+		return NULL;
+
+	if (head) {
+		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
+			return NULL;
+
+		if (head->block == AMDGPU_RAS_BLOCK__MCA) {
+			if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST)
+				return NULL;
+
+			obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index];
+		} else
+			obj = &con->objs[head->block];
+
+		if (alive_obj(obj))
+			return obj;
+	} else {
+		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+			obj = &con->objs[i];
+			if (alive_obj(obj))
+				return obj;
+		}
+	}
+
+	return NULL;
+}
+/* obj end */
+
+/* feature ctl begin */
+static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
+					 struct ras_common_if *head)
+{
+	return adev->ras_hw_enabled & BIT(head->block);
+}
+
+static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	return con->features & BIT(head->block);
+}
+
+/*
+ * if obj is not created, then create one.
+ * set feature enable flag.
+ */
+static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+		struct ras_common_if *head, int enable)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	/* If hardware does not support ras, then do not create obj.
+	 * But if hardware support ras, we can create the obj.
+	 * Ras framework checks con->hw_supported to see if it need do
+	 * corresponding initialization.
+	 * IP checks con->support to see if it need disable ras.
+	 */
+	if (!amdgpu_ras_is_feature_allowed(adev, head))
+		return 0;
+
+	if (enable) {
+		if (!obj) {
+			obj = amdgpu_ras_create_obj(adev, head);
+			if (!obj)
+				return -EINVAL;
+		} else {
+			/* In case we create obj somewhere else */
+			get_obj(obj);
+		}
+		con->features |= BIT(head->block);
+	} else {
+		if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
+			con->features &= ~BIT(head->block);
+			put_obj(obj);
+		}
+	}
+
+	return 0;
+}
+
+/* wrapper of psp_ras_enable_features */
+int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+		struct ras_common_if *head, bool enable)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	union ta_ras_cmd_input *info;
+	int ret;
+
+	if (!con)
+		return -EINVAL;
+
+	/* For non-gfx ip, do not enable ras feature if it is not allowed */
+	/* For gfx ip, regardless of feature support status, */
+	/* Force issue enable or disable ras feature commands */
+	if (head->block != AMDGPU_RAS_BLOCK__GFX &&
+	    !amdgpu_ras_is_feature_allowed(adev, head))
+		return 0;
+
+	/* Only enable gfx ras feature from host side */
+	if (head->block == AMDGPU_RAS_BLOCK__GFX &&
+	    !amdgpu_sriov_vf(adev) &&
+	    !amdgpu_ras_intr_triggered()) {
+		info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		if (!enable) {
+			info->disable_features = (struct ta_ras_disable_features_input) {
+				.block_id =  amdgpu_ras_block_to_ta(head->block),
+				.error_type = amdgpu_ras_error_to_ta(head->type),
+			};
+		} else {
+			info->enable_features = (struct ta_ras_enable_features_input) {
+				.block_id =  amdgpu_ras_block_to_ta(head->block),
+				.error_type = amdgpu_ras_error_to_ta(head->type),
+			};
+		}
+
+		ret = psp_ras_enable_features(&adev->psp, info, enable);
+		if (ret) {
+			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
+				enable ? "enable":"disable",
+				get_ras_block_str(head),
+				amdgpu_ras_is_poison_mode_supported(adev), ret);
+			kfree(info);
+			return ret;
+		}
+
+		kfree(info);
+	}
+
+	/* setup the obj */
+	__amdgpu_ras_feature_enable(adev, head, enable);
+
+	return 0;
+}
+
+/* Only used in device probe stage and called only once. */
+int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
+		struct ras_common_if *head, bool enable)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret;
+
+	if (!con)
+		return -EINVAL;
+
+	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
+		if (enable) {
+			/* There is no harm to issue a ras TA cmd regardless of
+			 * the currecnt ras state.
+			 * If current state == target state, it will do nothing
+			 * But sometimes it requests driver to reset and repost
+			 * with error code -EAGAIN.
+			 */
+			ret = amdgpu_ras_feature_enable(adev, head, 1);
+			/* With old ras TA, we might fail to enable ras.
+			 * Log it and just setup the object.
+			 * TODO need remove this WA in the future.
+			 */
+			if (ret == -EINVAL) {
+				ret = __amdgpu_ras_feature_enable(adev, head, 1);
+				if (!ret)
+					dev_info(adev->dev,
+						"RAS INFO: %s setup object\n",
+						get_ras_block_str(head));
+			}
+		} else {
+			/* setup the object then issue a ras TA disable cmd.*/
+			ret = __amdgpu_ras_feature_enable(adev, head, 1);
+			if (ret)
+				return ret;
+
+			/* gfx block ras disable cmd must send to ras-ta */
+			if (head->block == AMDGPU_RAS_BLOCK__GFX)
+				con->features |= BIT(head->block);
+
+			ret = amdgpu_ras_feature_enable(adev, head, 0);
+
+			/* clean gfx block ras features flag */
+			if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
+				con->features &= ~BIT(head->block);
+		}
+	} else
+		ret = amdgpu_ras_feature_enable(adev, head, enable);
+
+	return ret;
+}
+
+static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
+		bool bypass)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj, *tmp;
+
+	list_for_each_entry_safe(obj, tmp, &con->head, node) {
+		/* bypass psp.
+		 * aka just release the obj and corresponding flags
+		 */
+		if (bypass) {
+			if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
+				break;
+		} else {
+			if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
+				break;
+		}
+	}
+
+	return con->features;
+}
+
+static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
+		bool bypass)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int i;
+	const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE;
+
+	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {
+		struct ras_common_if head = {
+			.block = i,
+			.type = default_ras_type,
+			.sub_block_index = 0,
+		};
+
+		if (i == AMDGPU_RAS_BLOCK__MCA)
+			continue;
+
+		if (bypass) {
+			/*
+			 * bypass psp. vbios enable ras for us.
+			 * so just create the obj
+			 */
+			if (__amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		} else {
+			if (amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		}
+	}
+
+	for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {
+		struct ras_common_if head = {
+			.block = AMDGPU_RAS_BLOCK__MCA,
+			.type = default_ras_type,
+			.sub_block_index = i,
+		};
+
+		if (bypass) {
+			/*
+			 * bypass psp. vbios enable ras for us.
+			 * so just create the obj
+			 */
+			if (__amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		} else {
+			if (amdgpu_ras_feature_enable(adev, &head, 1))
+				break;
+		}
+	}
+
+	return con->features;
+}
+/* feature ctl end */
+
+static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj,
+		enum amdgpu_ras_block block)
+{
+	if (!block_obj)
+		return -EINVAL;
+
+	if (block_obj->ras_comm.block == block)
+		return 0;
+
+	return -EINVAL;
+}
+
+static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev,
+					enum amdgpu_ras_block block, uint32_t sub_block_index)
+{
+	struct amdgpu_ras_block_list *node, *tmp;
+	struct amdgpu_ras_block_object *obj;
+
+	if (block >= AMDGPU_RAS_BLOCK__LAST)
+		return NULL;
+
+	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+		if (!node->ras_obj) {
+			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+			continue;
+		}
+
+		obj = node->ras_obj;
+		if (obj->ras_block_match) {
+			if (obj->ras_block_match(obj, block, sub_block_index) == 0)
+				return obj;
+		} else {
+			if (amdgpu_ras_block_match_default(obj, block) == 0)
+				return obj;
+		}
+	}
+
+	return NULL;
+}
+
+static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	/*
+	 * choosing right query method according to
+	 * whether smu support query error information
+	 */
+	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
+	if (ret == -EOPNOTSUPP) {
+		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+			adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+			adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
+
+		/* umc query_ras_error_address is also responsible for clearing
+		 * error status
+		 */
+		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+		    adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
+			adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
+	} else if (!ret) {
+		if (adev->umc.ras &&
+			adev->umc.ras->ecc_info_query_ras_error_count)
+			adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
+
+		if (adev->umc.ras &&
+			adev->umc.ras->ecc_info_query_ras_error_address)
+			adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
+	}
+}
+
+static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
+					      struct ras_manager *ras_mgr,
+					      struct ras_err_data *err_data,
+					      struct ras_query_context *qctx,
+					      const char *blk_name,
+					      bool is_ue,
+					      bool is_de)
+{
+	struct amdgpu_smuio_mcm_config_info *mcm_info;
+	struct ras_err_node *err_node;
+	struct ras_err_info *err_info;
+	u64 event_id = qctx->evid.event_id;
+
+	if (is_ue) {
+		for_each_ras_error(err_node, err_data) {
+			err_info = &err_node->err_info;
+			mcm_info = &err_info->mcm_info;
+			if (err_info->ue_count) {
+				RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+					      "%lld new uncorrectable hardware errors detected in %s block\n",
+					      mcm_info->socket_id,
+					      mcm_info->die_id,
+					      err_info->ue_count,
+					      blk_name);
+			}
+		}
+
+		for_each_ras_error(err_node, &ras_mgr->err_data) {
+			err_info = &err_node->err_info;
+			mcm_info = &err_info->mcm_info;
+			RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+				      "%lld uncorrectable hardware errors detected in total in %s block\n",
+				      mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+		}
+
+	} else {
+		if (is_de) {
+			for_each_ras_error(err_node, err_data) {
+				err_info = &err_node->err_info;
+				mcm_info = &err_info->mcm_info;
+				if (err_info->de_count) {
+					RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+						      "%lld new deferred hardware errors detected in %s block\n",
+						      mcm_info->socket_id,
+						      mcm_info->die_id,
+						      err_info->de_count,
+						      blk_name);
+				}
+			}
+
+			for_each_ras_error(err_node, &ras_mgr->err_data) {
+				err_info = &err_node->err_info;
+				mcm_info = &err_info->mcm_info;
+				RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+					      "%lld deferred hardware errors detected in total in %s block\n",
+					      mcm_info->socket_id, mcm_info->die_id,
+					      err_info->de_count, blk_name);
+			}
+		} else {
+			if (adev->debug_disable_ce_logs)
+				return;
+
+			for_each_ras_error(err_node, err_data) {
+				err_info = &err_node->err_info;
+				mcm_info = &err_info->mcm_info;
+				if (err_info->ce_count) {
+					RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+						      "%lld new correctable hardware errors detected in %s block\n",
+						      mcm_info->socket_id,
+						      mcm_info->die_id,
+						      err_info->ce_count,
+						      blk_name);
+				}
+			}
+
+			for_each_ras_error(err_node, &ras_mgr->err_data) {
+				err_info = &err_node->err_info;
+				mcm_info = &err_info->mcm_info;
+				RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+					      "%lld correctable hardware errors detected in total in %s block\n",
+					      mcm_info->socket_id, mcm_info->die_id,
+					      err_info->ce_count, blk_name);
+			}
+		}
+	}
+}
+
+static inline bool err_data_has_source_info(struct ras_err_data *data)
+{
+	return !list_empty(&data->err_node_list);
+}
+
+static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
+					     struct ras_query_if *query_if,
+					     struct ras_err_data *err_data,
+					     struct ras_query_context *qctx)
+{
+	struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
+	const char *blk_name = get_ras_block_str(&query_if->head);
+	u64 event_id = qctx->evid.event_id;
+
+	if (err_data->ce_count) {
+		if (err_data_has_source_info(err_data)) {
+			amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+							  blk_name, false, false);
+		} else if (!adev->aid_mask &&
+			   adev->smuio.funcs &&
+			   adev->smuio.funcs->get_socket_id &&
+			   adev->smuio.funcs->get_die_id) {
+			RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+				      "%ld correctable hardware errors "
+				      "detected in %s block\n",
+				      adev->smuio.funcs->get_socket_id(adev),
+				      adev->smuio.funcs->get_die_id(adev),
+				      ras_mgr->err_data.ce_count,
+				      blk_name);
+		} else {
+			RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
+				      "detected in %s block\n",
+				      ras_mgr->err_data.ce_count,
+				      blk_name);
+		}
+	}
+
+	if (err_data->ue_count) {
+		if (err_data_has_source_info(err_data)) {
+			amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+							  blk_name, true, false);
+		} else if (!adev->aid_mask &&
+			   adev->smuio.funcs &&
+			   adev->smuio.funcs->get_socket_id &&
+			   adev->smuio.funcs->get_die_id) {
+			RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+				      "%ld uncorrectable hardware errors "
+				      "detected in %s block\n",
+				      adev->smuio.funcs->get_socket_id(adev),
+				      adev->smuio.funcs->get_die_id(adev),
+				      ras_mgr->err_data.ue_count,
+				      blk_name);
+		} else {
+			RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
+				      "detected in %s block\n",
+				      ras_mgr->err_data.ue_count,
+				      blk_name);
+		}
+	}
+
+	if (err_data->de_count) {
+		if (err_data_has_source_info(err_data)) {
+			amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
+							  blk_name, false, true);
+		} else if (!adev->aid_mask &&
+			   adev->smuio.funcs &&
+			   adev->smuio.funcs->get_socket_id &&
+			   adev->smuio.funcs->get_die_id) {
+			RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+				      "%ld deferred hardware errors "
+				      "detected in %s block\n",
+				      adev->smuio.funcs->get_socket_id(adev),
+				      adev->smuio.funcs->get_die_id(adev),
+				      ras_mgr->err_data.de_count,
+				      blk_name);
+		} else {
+			RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
+				      "detected in %s block\n",
+				      ras_mgr->err_data.de_count,
+				      blk_name);
+		}
+	}
+}
+
+static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev,
+						  struct ras_query_if *query_if,
+						  struct ras_err_data *err_data,
+						  struct ras_query_context *qctx)
+{
+	unsigned long new_ue, new_ce, new_de;
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head);
+	const char *blk_name = get_ras_block_str(&query_if->head);
+	u64 event_id = qctx->evid.event_id;
+
+	new_ce = err_data->ce_count - obj->err_data.ce_count;
+	new_ue = err_data->ue_count - obj->err_data.ue_count;
+	new_de = err_data->de_count - obj->err_data.de_count;
+
+	if (new_ce) {
+		RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors "
+			      "detected in %s block\n",
+			      new_ce,
+			      blk_name);
+	}
+
+	if (new_ue) {
+		RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors "
+			      "detected in %s block\n",
+			      new_ue,
+			      blk_name);
+	}
+
+	if (new_de) {
+		RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors "
+			      "detected in %s block\n",
+			      new_de,
+			      blk_name);
+	}
+}
+
+static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
+{
+	struct ras_err_node *err_node;
+	struct ras_err_info *err_info;
+
+	if (err_data_has_source_info(err_data)) {
+		for_each_ras_error(err_node, err_data) {
+			err_info = &err_node->err_info;
+			amdgpu_ras_error_statistic_de_count(&obj->err_data,
+					&err_info->mcm_info, err_info->de_count);
+			amdgpu_ras_error_statistic_ce_count(&obj->err_data,
+					&err_info->mcm_info, err_info->ce_count);
+			amdgpu_ras_error_statistic_ue_count(&obj->err_data,
+					&err_info->mcm_info, err_info->ue_count);
+		}
+	} else {
+		/* for legacy asic path which doesn't has error source info */
+		obj->err_data.ue_count += err_data->ue_count;
+		obj->err_data.ce_count += err_data->ce_count;
+		obj->err_data.de_count += err_data->de_count;
+	}
+}
+
+static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj,
+							     struct ras_err_data *err_data)
+{
+	/* Host reports absolute counts */
+	obj->err_data.ue_count = err_data->ue_count;
+	obj->err_data.ce_count = err_data->ce_count;
+	obj->err_data.de_count = err_data->de_count;
+}
+
+static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+	struct ras_common_if head;
+
+	memset(&head, 0, sizeof(head));
+	head.block = blk;
+
+	return amdgpu_ras_find_obj(adev, &head);
+}
+
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+			const struct aca_info *aca_info, void *data)
+{
+	struct ras_manager *obj;
+
+	/* in resume phase, no need to create aca fs node */
+	if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
+		return 0;
+
+	obj = get_ras_manager(adev, blk);
+	if (!obj)
+		return -EINVAL;
+
+	return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data);
+}
+
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
+{
+	struct ras_manager *obj;
+
+	obj = get_ras_manager(adev, blk);
+	if (!obj)
+		return -EINVAL;
+
+	amdgpu_aca_remove_handle(&obj->aca_handle);
+
+	return 0;
+}
+
+static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+					 enum aca_error_type type, struct ras_err_data *err_data,
+					 struct ras_query_context *qctx)
+{
+	struct ras_manager *obj;
+
+	obj = get_ras_manager(adev, blk);
+	if (!obj)
+		return -EINVAL;
+
+	return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
+}
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+				  struct aca_handle *handle, char *buf, void *data)
+{
+	struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle);
+	struct ras_query_if info = {
+		.head = obj->head,
+	};
+
+	if (!amdgpu_ras_get_error_query_ready(obj->adev))
+		return sysfs_emit(buf, "Query currently inaccessible\n");
+
+	if (amdgpu_ras_query_error_status(obj->adev, &info))
+		return -EINVAL;
+
+	return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+			  "ce", info.ce_count, "de", info.de_count);
+}
+
+static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
+						struct ras_query_if *info,
+						struct ras_err_data *err_data,
+						struct ras_query_context *qctx,
+						unsigned int error_query_mode)
+{
+	enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
+	struct amdgpu_ras_block_object *block_obj = NULL;
+	int ret;
+
+	if (blk == AMDGPU_RAS_BLOCK_COUNT)
+		return -EINVAL;
+
+	if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
+		return -EINVAL;
+
+	if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+		return amdgpu_virt_req_ras_err_count(adev, blk, err_data);
+	} else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+		if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
+			amdgpu_ras_get_ecc_info(adev, err_data);
+		} else {
+			block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+			if (!block_obj || !block_obj->hw_ops) {
+				dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+					     get_ras_block_str(&info->head));
+				return -EINVAL;
+			}
+
+			if (block_obj->hw_ops->query_ras_error_count)
+				block_obj->hw_ops->query_ras_error_count(adev, err_data);
+
+			if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
+			    (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
+			    (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) {
+				if (block_obj->hw_ops->query_ras_error_status)
+					block_obj->hw_ops->query_ras_error_status(adev);
+			}
+		}
+	} else {
+		if (amdgpu_aca_is_enabled(adev)) {
+			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
+			if (ret)
+				return ret;
+
+			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
+			if (ret)
+				return ret;
+
+			ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
+			if (ret)
+				return ret;
+		} else {
+			/* FIXME: add code to check return value later */
+			amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
+			amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
+		}
+	}
+
+	return 0;
+}
+
+/* query/inject/cure begin */
+static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev,
+						    struct ras_query_if *info,
+						    enum ras_event_type type)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+	struct ras_err_data err_data;
+	struct ras_query_context qctx;
+	unsigned int error_query_mode;
+	int ret;
+
+	if (!obj)
+		return -EINVAL;
+
+	ret = amdgpu_ras_error_data_init(&err_data);
+	if (ret)
+		return ret;
+
+	if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
+		return -EINVAL;
+
+	memset(&qctx, 0, sizeof(qctx));
+	qctx.evid.type = type;
+	qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type);
+
+	if (!down_read_trylock(&adev->reset_domain->sem)) {
+		ret = -EIO;
+		goto out_fini_err_data;
+	}
+
+	ret = amdgpu_ras_query_error_status_helper(adev, info,
+						   &err_data,
+						   &qctx,
+						   error_query_mode);
+	up_read(&adev->reset_domain->sem);
+	if (ret)
+		goto out_fini_err_data;
+
+	if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+		amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+		amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
+	} else {
+		/* Host provides absolute error counts. First generate the report
+		 * using the previous VF internal count against new host count.
+		 * Then Update VF internal count.
+		 */
+		amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx);
+		amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data);
+	}
+
+	info->ue_count = obj->err_data.ue_count;
+	info->ce_count = obj->err_data.ce_count;
+	info->de_count = obj->err_data.de_count;
+
+out_fini_err_data:
+	amdgpu_ras_error_data_fini(&err_data);
+
+	return ret;
+}
+
+static int amdgpu_uniras_clear_badpages_info(struct amdgpu_device *adev)
+{
+	struct ras_cmd_dev_handle req = {0};
+	int ret;
+
+	ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__CLEAR_BAD_PAGE_INFO,
+				&req, sizeof(req), NULL, 0);
+	if (ret) {
+		dev_err(adev->dev, "Failed to clear bad pages info, ret: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int amdgpu_uniras_query_block_ecc(struct amdgpu_device *adev,
+			struct ras_query_if *info)
+{
+	struct ras_cmd_block_ecc_info_req req = {0};
+	struct ras_cmd_block_ecc_info_rsp rsp = {0};
+	int ret;
+
+	if (!info)
+		return -EINVAL;
+
+	req.block_id = info->head.block;
+	req.subblock_id = info->head.sub_block_index;
+
+	ret = amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BLOCK_ECC_STATUS,
+				&req, sizeof(req), &rsp, sizeof(rsp));
+	if (!ret) {
+		info->ce_count = rsp.ce_count;
+		info->ue_count = rsp.ue_count;
+		info->de_count = rsp.de_count;
+	}
+
+	return ret;
+}
+
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info)
+{
+	if (amdgpu_uniras_enabled(adev))
+		return amdgpu_uniras_query_block_ecc(adev, info);
+	else
+		return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID);
+}
+
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block)
+{
+	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+	const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+	if (!block_obj || !block_obj->hw_ops) {
+		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+				ras_block_str(block));
+		return -EOPNOTSUPP;
+	}
+
+	if (!amdgpu_ras_is_supported(adev, block) ||
+	    !amdgpu_ras_get_aca_debug_mode(adev))
+		return -EOPNOTSUPP;
+
+	if (amdgpu_sriov_vf(adev))
+		return -EOPNOTSUPP;
+
+	/* skip ras error reset in gpu reset */
+	if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
+	    ((smu_funcs && smu_funcs->set_debug_mode) ||
+	     (mca_funcs && mca_funcs->mca_set_debug_mode)))
+		return -EOPNOTSUPP;
+
+	if (block_obj->hw_ops->reset_ras_error_count)
+		block_obj->hw_ops->reset_ras_error_count(adev);
+
+	return 0;
+}
+
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block)
+{
+	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
+	if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP)
+		return 0;
+
+	if ((block == AMDGPU_RAS_BLOCK__GFX) ||
+	    (block == AMDGPU_RAS_BLOCK__MMHUB)) {
+		if (block_obj->hw_ops->reset_ras_error_status)
+			block_obj->hw_ops->reset_ras_error_status(adev);
+	}
+
+	return 0;
+}
+
+static int amdgpu_uniras_error_inject(struct amdgpu_device *adev,
+		struct ras_inject_if *info)
+{
+	struct ras_cmd_inject_error_req inject_req;
+	struct ras_cmd_inject_error_rsp rsp;
+
+	if (!info)
+		return -EINVAL;
+
+	memset(&inject_req, 0, sizeof(inject_req));
+	inject_req.block_id = info->head.block;
+	inject_req.subblock_id = info->head.sub_block_index;
+	inject_req.address = info->address;
+	inject_req.error_type = info->head.type;
+	inject_req.instance_mask = info->instance_mask;
+	inject_req.method = info->value;
+
+	return amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__INJECT_ERROR,
+			&inject_req, sizeof(inject_req), &rsp, sizeof(rsp));
+}
+
+/* wrapper of psp_ras_trigger_error */
+int amdgpu_ras_error_inject(struct amdgpu_device *adev,
+		struct ras_inject_if *info)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
+	struct ta_ras_trigger_error_input block_info = {
+		.block_id =  amdgpu_ras_block_to_ta(info->head.block),
+		.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
+		.sub_block_index = info->head.sub_block_index,
+		.address = info->address,
+		.value = info->value,
+	};
+	int ret = -EINVAL;
+	struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev,
+							info->head.block,
+							info->head.sub_block_index);
+
+	if (amdgpu_uniras_enabled(adev))
+		return amdgpu_uniras_error_inject(adev, info);
+
+	/* inject on guest isn't allowed, return success directly */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	if (!obj)
+		return -EINVAL;
+
+	if (!block_obj || !block_obj->hw_ops)	{
+		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+			     get_ras_block_str(&info->head));
+		return -EINVAL;
+	}
+
+	/* Calculate XGMI relative offset */
+	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
+	    info->head.block != AMDGPU_RAS_BLOCK__GFX) {
+		block_info.address =
+			amdgpu_xgmi_get_relative_phy_addr(adev,
+							  block_info.address);
+	}
+
+	if (block_obj->hw_ops->ras_error_inject) {
+		if (info->head.block == AMDGPU_RAS_BLOCK__GFX)
+			ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask);
+		else /* Special ras_error_inject is defined (e.g: xgmi) */
+			ret = block_obj->hw_ops->ras_error_inject(adev, &block_info,
+						info->instance_mask);
+	} else {
+		/* default path */
+		ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask);
+	}
+
+	if (ret)
+		dev_err(adev->dev, "ras inject %s failed %d\n",
+			get_ras_block_str(&info->head), ret);
+
+	return ret;
+}
+
+/**
+ * amdgpu_ras_query_error_count_helper -- Get error counter for specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible errors.
+ * @query_info: pointer to ras_query_if
+ *
+ * Return 0 for query success or do nothing, otherwise return an error
+ * on failures
+ */
+static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev,
+					       unsigned long *ce_count,
+					       unsigned long *ue_count,
+					       struct ras_query_if *query_info)
+{
+	int ret;
+
+	if (!query_info)
+		/* do nothing if query_info is not specified */
+		return 0;
+
+	ret = amdgpu_ras_query_error_status(adev, query_info);
+	if (ret)
+		return ret;
+
+	*ce_count += query_info->ce_count;
+	*ue_count += query_info->ue_count;
+
+	/* some hardware/IP supports read to clear
+	 * no need to explictly reset the err status after the query call */
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) &&
+	    amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) {
+		if (amdgpu_ras_reset_error_status(adev, query_info->head.block))
+			dev_warn(adev->dev,
+				 "Failed to reset error counter and error status\n");
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ras_query_error_count -- Get error counts of all IPs or specific IP
+ * @adev: pointer to AMD GPU device
+ * @ce_count: pointer to an integer to be set to the count of correctible errors.
+ * @ue_count: pointer to an integer to be set to the count of uncorrectible
+ * errors.
+ * @query_info: pointer to ras_query_if if the query request is only for
+ * specific ip block; if info is NULL, then the qurey request is for
+ * all the ip blocks that support query ras error counters/status
+ *
+ * If set, @ce_count or @ue_count, count and return the corresponding
+ * error counts in those integer pointers. Return 0 if the device
+ * supports RAS. Return -EOPNOTSUPP if the device doesn't support RAS.
+ */
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+				 unsigned long *ce_count,
+				 unsigned long *ue_count,
+				 struct ras_query_if *query_info)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj;
+	unsigned long ce, ue;
+	int ret;
+
+	if (!adev->ras_enabled || !con)
+		return -EOPNOTSUPP;
+
+	/* Don't count since no reporting.
+	 */
+	if (!ce_count && !ue_count)
+		return 0;
+
+	ce = 0;
+	ue = 0;
+	if (!query_info) {
+		/* query all the ip blocks that support ras query interface */
+		list_for_each_entry(obj, &con->head, node) {
+			struct ras_query_if info = {
+				.head = obj->head,
+			};
+
+			ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, &info);
+		}
+	} else {
+		/* query specific ip block */
+		ret = amdgpu_ras_query_error_count_helper(adev, &ce, &ue, query_info);
+	}
+
+	if (ret)
+		return ret;
+
+	if (ce_count)
+		*ce_count = ce;
+
+	if (ue_count)
+		*ue_count = ue;
+
+	return 0;
+}
+/* query/inject/cure end */
+
+
+/* sysfs begin */
+
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage *bps, uint32_t count, uint32_t start);
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage *bps, uint32_t count, uint32_t start);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+	switch (flags) {
+	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
+		return "R";
+	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
+		return "P";
+	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
+	default:
+		return "F";
+	}
+}
+
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ *
+ * R: reserved, this gpu page is reserved and not able to use.
+ *
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ * in next window of page_reserve.
+ *
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ *	0x00000001 : 0x00001000 : R
+ *	0x00000002 : 0x00001000 : P
+ *
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+		struct kobject *kobj, const struct bin_attribute *attr,
+		char *buf, loff_t ppos, size_t count)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, badpages_attr);
+	struct amdgpu_device *adev = con->adev;
+	const unsigned int element_size =
+		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
+	unsigned int end = div64_ul(ppos + count - 1, element_size);
+	ssize_t s = 0;
+	struct ras_badpage *bps = NULL;
+	int bps_count = 0, i, status;
+	uint64_t address;
+
+	memset(buf, 0, count);
+
+	bps_count = end - start;
+	bps = kmalloc_array(bps_count, sizeof(*bps), GFP_KERNEL);
+	if (!bps)
+		return 0;
+
+	memset(bps, 0, sizeof(*bps) * bps_count);
+
+	if (amdgpu_uniras_enabled(adev))
+		bps_count = amdgpu_uniras_badpages_read(adev, bps, bps_count, start);
+	else
+		bps_count = amdgpu_ras_badpages_read(adev, bps, bps_count, start);
+
+	if (bps_count <= 0) {
+		kfree(bps);
+		return 0;
+	}
+
+	for (i = 0; i < bps_count; i++) {
+		address = ((uint64_t)bps[i].bp) << AMDGPU_GPU_PAGE_SHIFT;
+		if (amdgpu_ras_check_critical_address(adev, address))
+			continue;
+
+		bps[i].size = AMDGPU_GPU_PAGE_SIZE;
+
+		status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
+					address);
+		if (status == -EBUSY)
+			bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
+		else if (status == -ENOENT)
+			bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
+		else
+			bps[i].flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED;
+
+		s += scnprintf(&buf[s], element_size + 1,
+				"0x%08x : 0x%08x : %1s\n",
+				bps[i].bp,
+				bps[i].size,
+				amdgpu_ras_badpage_flags_str(bps[i].flags));
+	}
+
+	kfree(bps);
+
+	return s;
+}
+
+static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, features_attr);
+
+	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
+}
+
+static bool amdgpu_ras_get_version_info(struct amdgpu_device *adev, u32 *major,
+			u32 *minor, u32 *rev)
+{
+	int i;
+
+	if (!adev || !major || !minor || !rev || !amdgpu_uniras_enabled(adev))
+		return false;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_RAS) {
+			*major = adev->ip_blocks[i].version->major;
+			*minor = adev->ip_blocks[i].version->minor;
+			*rev = adev->ip_blocks[i].version->rev;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, version_attr);
+	u32 major, minor, rev;
+	ssize_t size = 0;
+
+	size += sysfs_emit_at(buf, size, "table version: 0x%x\n",
+			con->eeprom_control.tbl_hdr.version);
+
+	if (amdgpu_ras_get_version_info(con->adev, &major, &minor, &rev))
+		size += sysfs_emit_at(buf, size, "ras version: %u.%u.%u\n",
+			major, minor, rev);
+
+	return size;
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, schema_attr);
+	return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
+static struct {
+	enum ras_event_type type;
+	const char *name;
+} dump_event[] = {
+	{RAS_EVENT_TYPE_FATAL, "Fatal Error"},
+	{RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
+	{RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
+};
+
+static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
+						 struct device_attribute *attr, char *buf)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, event_state_attr);
+	struct ras_event_manager *event_mgr = con->event_mgr;
+	struct ras_event_state *event_state;
+	int i, size = 0;
+
+	if (!event_mgr)
+		return -EINVAL;
+
+	size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
+	for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
+		event_state = &event_mgr->event_state[dump_event[i].type];
+		size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
+				      dump_event[i].name,
+				      atomic64_read(&event_state->count),
+				      event_state->last_seqno);
+	}
+
+	return (ssize_t)size;
+}
+
+static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (adev->dev->kobj.sd)
+		sysfs_remove_file_from_group(&adev->dev->kobj,
+				&con->badpages_attr.attr,
+				RAS_FS_NAME);
+}
+
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct attribute *attrs[] = {
+		&con->features_attr.attr,
+		&con->version_attr.attr,
+		&con->schema_attr.attr,
+		&con->event_state_attr.attr,
+		NULL
+	};
+	struct attribute_group group = {
+		.name = RAS_FS_NAME,
+		.attrs = attrs,
+	};
+
+	if (adev->dev->kobj.sd)
+		sysfs_remove_group(&adev->dev->kobj, &group);
+
+	return 0;
+}
+
+int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (amdgpu_aca_is_enabled(adev))
+		return 0;
+
+	if (!obj || obj->attr_inuse)
+		return -EINVAL;
+
+	if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block))
+		return 0;
+
+	get_obj(obj);
+
+	snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
+		"%s_err_count", head->name);
+
+	obj->sysfs_attr = (struct device_attribute){
+		.attr = {
+			.name = obj->fs_data.sysfs_name,
+			.mode = S_IRUGO,
+		},
+			.show = amdgpu_ras_sysfs_read,
+	};
+	sysfs_attr_init(&obj->sysfs_attr.attr);
+
+	if (sysfs_add_file_to_group(&adev->dev->kobj,
+				&obj->sysfs_attr.attr,
+				RAS_FS_NAME)) {
+		put_obj(obj);
+		return -EINVAL;
+	}
+
+	obj->attr_inuse = 1;
+
+	return 0;
+}
+
+int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (amdgpu_aca_is_enabled(adev))
+		return 0;
+
+	if (!obj || !obj->attr_inuse)
+		return -EINVAL;
+
+	if (adev->dev->kobj.sd)
+		sysfs_remove_file_from_group(&adev->dev->kobj,
+				&obj->sysfs_attr.attr,
+				RAS_FS_NAME);
+	obj->attr_inuse = 0;
+	put_obj(obj);
+
+	return 0;
+}
+
+static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj, *tmp;
+
+	list_for_each_entry_safe(obj, tmp, &con->head, node) {
+		amdgpu_ras_sysfs_remove(adev, &obj->head);
+	}
+
+	if (amdgpu_bad_page_threshold != 0)
+		amdgpu_ras_sysfs_remove_bad_page_node(adev);
+
+	amdgpu_ras_sysfs_remove_dev_attr_node(adev);
+
+	return 0;
+}
+/* sysfs end */
+
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover.  However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ *	echo true > .../ras/auto_reboot
+ *
+ */
+/* debugfs begin */
+static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control;
+	struct drm_minor  *minor = adev_to_drm(adev)->primary;
+	struct dentry     *dir;
+
+	dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
+	debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
+			    &amdgpu_ras_debugfs_ctrl_ops);
+	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, dir, adev,
+			    &amdgpu_ras_debugfs_eeprom_ops);
+	debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
+			   &con->bad_page_cnt_threshold);
+	debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs);
+	debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
+	debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
+	debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev,
+			    &amdgpu_ras_debugfs_eeprom_size_ops);
+	con->de_ras_eeprom_table = debugfs_create_file("ras_eeprom_table",
+						       S_IRUGO, dir, adev,
+						       &amdgpu_ras_debugfs_eeprom_table_ops);
+	amdgpu_ras_debugfs_set_ret_size(&con->eeprom_control);
+
+	/*
+	 * After one uncorrectable error happens, usually GPU recovery will
+	 * be scheduled. But due to the known problem in GPU recovery failing
+	 * to bring GPU back, below interface provides one direct way to
+	 * user to reboot system automatically in such case within
+	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
+	 * will never be called.
+	 */
+	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, dir, &con->reboot);
+
+	/*
+	 * User could set this not to clean up hardware's error count register
+	 * of RAS IPs during ras recovery.
+	 */
+	debugfs_create_bool("disable_ras_err_cnt_harvest", 0644, dir,
+			    &con->disable_ras_err_cnt_harvest);
+	return dir;
+}
+
+static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+				      struct ras_fs_if *head,
+				      struct dentry *dir)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
+
+	if (!obj || !dir)
+		return;
+
+	get_obj(obj);
+
+	memcpy(obj->fs_data.debugfs_name,
+			head->debugfs_name,
+			sizeof(obj->fs_data.debugfs_name));
+
+	debugfs_create_file(obj->fs_data.debugfs_name, S_IWUGO | S_IRUGO, dir,
+			    obj, &amdgpu_ras_debugfs_ops);
+}
+
+static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
+{
+	bool ret;
+
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		ret = true;
+		break;
+	default:
+		ret = false;
+		break;
+	}
+
+	return ret;
+}
+
+void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct dentry *dir;
+	struct ras_manager *obj;
+	struct ras_fs_if fs_info;
+
+	/*
+	 * it won't be called in resume path, no need to check
+	 * suspend and gpu reset status
+	 */
+	if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)
+		return;
+
+	dir = amdgpu_ras_debugfs_create_ctrl_node(adev);
+
+	list_for_each_entry(obj, &con->head, node) {
+		if (amdgpu_ras_is_supported(adev, obj->head.block) &&
+			(obj->attr_inuse == 1)) {
+			sprintf(fs_info.debugfs_name, "%s_err_inject",
+					get_ras_block_str(&obj->head));
+			fs_info.head = obj->head;
+			amdgpu_ras_debugfs_create(adev, &fs_info, dir);
+		}
+	}
+
+	if (amdgpu_ras_aca_is_supported(adev)) {
+		if (amdgpu_aca_is_enabled(adev))
+			amdgpu_aca_smu_debugfs_init(adev, dir);
+		else
+			amdgpu_mca_smu_debugfs_init(adev, dir);
+	}
+}
+
+/* debugfs end */
+
+/* ras fs */
+static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+		      amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static DEVICE_ATTR(features, S_IRUGO,
+		amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+		amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+		amdgpu_ras_sysfs_schema_show, NULL);
+static DEVICE_ATTR(event_state, 0444,
+		   amdgpu_ras_sysfs_event_state_show, NULL);
+static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct attribute_group group = {
+		.name = RAS_FS_NAME,
+	};
+	struct attribute *attrs[] = {
+		&con->features_attr.attr,
+		&con->version_attr.attr,
+		&con->schema_attr.attr,
+		&con->event_state_attr.attr,
+		NULL
+	};
+	const struct bin_attribute *bin_attrs[] = {
+		NULL,
+		NULL,
+	};
+	int r;
+
+	group.attrs = attrs;
+
+	/* add features entry */
+	con->features_attr = dev_attr_features;
+	sysfs_attr_init(attrs[0]);
+
+	/* add version entry */
+	con->version_attr = dev_attr_version;
+	sysfs_attr_init(attrs[1]);
+
+	/* add schema entry */
+	con->schema_attr = dev_attr_schema;
+	sysfs_attr_init(attrs[2]);
+
+	/* add event_state entry */
+	con->event_state_attr = dev_attr_event_state;
+	sysfs_attr_init(attrs[3]);
+
+	if (amdgpu_bad_page_threshold != 0) {
+		/* add bad_page_features entry */
+		con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+		sysfs_bin_attr_init(&con->badpages_attr);
+		bin_attrs[0] = &con->badpages_attr;
+		group.bin_attrs = bin_attrs;
+	}
+
+	r = sysfs_create_group(&adev->dev->kobj, &group);
+	if (r)
+		dev_err(adev->dev, "Failed to create RAS sysfs group!");
+
+	return 0;
+}
+
+static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *con_obj, *ip_obj, *tmp;
+
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		list_for_each_entry_safe(con_obj, tmp, &con->head, node) {
+			ip_obj = amdgpu_ras_find_obj(adev, &con_obj->head);
+			if (ip_obj)
+				put_obj(ip_obj);
+		}
+	}
+
+	amdgpu_ras_sysfs_remove_all(adev);
+	return 0;
+}
+/* ras fs end */
+
+/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+	/* Fatal error events are handled on host side */
+	if (amdgpu_sriov_vf(adev))
+		return;
+	/*
+	 * If the current interrupt is caused by a non-fatal RAS error, skip
+	 * check for fatal error. For fatal errors, FED status of all devices
+	 * in XGMI hive gets set when the first device gets fatal error
+	 * interrupt. The error gets propagated to other devices as well, so
+	 * make sure to ack the interrupt regardless of FED status.
+	 */
+	if (!amdgpu_ras_get_fed_status(adev) &&
+	    amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+		return;
+
+	if (amdgpu_uniras_enabled(adev)) {
+		amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
+		return;
+	}
+
+	if (adev->nbio.ras &&
+	    adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+		adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+	if (adev->nbio.ras &&
+	    adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+		adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
+				struct amdgpu_iv_entry *entry)
+{
+	bool poison_stat = false;
+	struct amdgpu_device *adev = obj->adev;
+	struct amdgpu_ras_block_object *block_obj =
+		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+	u64 event_id;
+	int ret;
+
+	if (!block_obj || !con)
+		return;
+
+	ret = amdgpu_ras_mark_ras_event(adev, type);
+	if (ret)
+		return;
+
+	amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
+	/* both query_poison_status and handle_poison_consumption are optional,
+	 * but at least one of them should be implemented if we need poison
+	 * consumption handler
+	 */
+	if (block_obj->hw_ops && block_obj->hw_ops->query_poison_status) {
+		poison_stat = block_obj->hw_ops->query_poison_status(adev);
+		if (!poison_stat) {
+			/* Not poison consumption interrupt, no need to handle it */
+			dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+					block_obj->ras_comm.name);
+
+			return;
+		}
+	}
+
+	amdgpu_umc_poison_handler(adev, obj->head.block, 0);
+
+	if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
+		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+	/* gpu reset is fallback for failed and default cases.
+	 * For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
+	 */
+	if (poison_stat && !amdgpu_ras_is_rma(adev)) {
+		event_id = amdgpu_ras_acquire_event_id(adev, type);
+		RAS_EVENT_LOG(adev, event_id,
+			      "GPU reset for %s RAS poison consumption is issued!\n",
+			      block_obj->ras_comm.name);
+		amdgpu_ras_reset_gpu(adev);
+	}
+
+	if (!poison_stat)
+		amdgpu_gfx_poison_consumption_handler(adev, entry);
+}
+
+static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+				struct amdgpu_iv_entry *entry)
+{
+	struct amdgpu_device *adev = obj->adev;
+	enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+	u64 event_id;
+	int ret;
+
+	ret = amdgpu_ras_mark_ras_event(adev, type);
+	if (ret)
+		return;
+
+	event_id = amdgpu_ras_acquire_event_id(adev, type);
+	RAS_EVENT_LOG(adev, event_id, "Poison is created\n");
+
+	if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+		struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+		atomic_inc(&con->page_retirement_req_cnt);
+		atomic_inc(&con->poison_creation_count);
+
+		wake_up(&con->page_retirement_wq);
+	}
+}
+
+static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
+				struct amdgpu_iv_entry *entry)
+{
+	struct ras_ih_data *data = &obj->ih_data;
+	struct ras_err_data err_data;
+	int ret;
+
+	if (!data->cb)
+		return;
+
+	ret = amdgpu_ras_error_data_init(&err_data);
+	if (ret)
+		return;
+
+	/* Let IP handle its data, maybe we need get the output
+	 * from the callback to update the error type/count, etc
+	 */
+	amdgpu_ras_set_fed(obj->adev, true);
+	ret = data->cb(obj->adev, &err_data, entry);
+	/* ue will trigger an interrupt, and in that case
+	 * we need do a reset to recovery the whole system.
+	 * But leave IP do that recovery, here we just dispatch
+	 * the error.
+	 */
+	if (ret == AMDGPU_RAS_SUCCESS) {
+		/* these counts could be left as 0 if
+		 * some blocks do not count error number
+		 */
+		obj->err_data.ue_count += err_data.ue_count;
+		obj->err_data.ce_count += err_data.ce_count;
+		obj->err_data.de_count += err_data.de_count;
+	}
+
+	amdgpu_ras_error_data_fini(&err_data);
+}
+
+static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
+{
+	struct ras_ih_data *data = &obj->ih_data;
+	struct amdgpu_iv_entry entry;
+
+	while (data->rptr != data->wptr) {
+		rmb();
+		memcpy(&entry, &data->ring[data->rptr],
+				data->element_size);
+
+		wmb();
+		data->rptr = (data->aligned_element_size +
+				data->rptr) % data->ring_size;
+
+		if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
+			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+				amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
+			else
+				amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
+		} else {
+			if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+				amdgpu_ras_interrupt_umc_handler(obj, &entry);
+			else
+				dev_warn(obj->adev->dev,
+					"No RAS interrupt handler for non-UMC block with poison disabled.\n");
+		}
+	}
+}
+
+static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
+{
+	struct ras_ih_data *data =
+		container_of(work, struct ras_ih_data, ih_work);
+	struct ras_manager *obj =
+		container_of(data, struct ras_manager, ih_data);
+
+	amdgpu_ras_interrupt_handler(obj);
+}
+
+int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
+		struct ras_dispatch_if *info)
+{
+	struct ras_manager *obj;
+	struct ras_ih_data *data;
+
+	if (amdgpu_uniras_enabled(adev)) {
+		struct ras_ih_info ih_info;
+
+		memset(&ih_info, 0, sizeof(ih_info));
+		ih_info.block = info->head.block;
+		memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
+
+		return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
+	}
+
+	obj = amdgpu_ras_find_obj(adev, &info->head);
+	if (!obj)
+		return -EINVAL;
+
+	data = &obj->ih_data;
+
+	if (data->inuse == 0)
+		return 0;
+
+	/* Might be overflow... */
+	memcpy(&data->ring[data->wptr], info->entry,
+			data->element_size);
+
+	wmb();
+	data->wptr = (data->aligned_element_size +
+			data->wptr) % data->ring_size;
+
+	schedule_work(&data->ih_work);
+
+	return 0;
+}
+
+int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+	struct ras_ih_data *data;
+
+	if (!obj)
+		return -EINVAL;
+
+	data = &obj->ih_data;
+	if (data->inuse == 0)
+		return 0;
+
+	cancel_work_sync(&data->ih_work);
+
+	kfree(data->ring);
+	memset(data, 0, sizeof(*data));
+	put_obj(obj);
+
+	return 0;
+}
+
+int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
+		struct ras_common_if *head)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+	struct ras_ih_data *data;
+	struct amdgpu_ras_block_object *ras_obj;
+
+	if (!obj) {
+		/* in case we registe the IH before enable ras feature */
+		obj = amdgpu_ras_create_obj(adev, head);
+		if (!obj)
+			return -EINVAL;
+	} else
+		get_obj(obj);
+
+	ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm);
+
+	data = &obj->ih_data;
+	/* add the callback.etc */
+	*data = (struct ras_ih_data) {
+		.inuse = 0,
+		.cb = ras_obj->ras_cb,
+		.element_size = sizeof(struct amdgpu_iv_entry),
+		.rptr = 0,
+		.wptr = 0,
+	};
+
+	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
+
+	data->aligned_element_size = ALIGN(data->element_size, 8);
+	/* the ring can store 64 iv entries. */
+	data->ring_size = 64 * data->aligned_element_size;
+	data->ring = kmalloc(data->ring_size, GFP_KERNEL);
+	if (!data->ring) {
+		put_obj(obj);
+		return -ENOMEM;
+	}
+
+	/* IH is ready */
+	data->inuse = 1;
+
+	return 0;
+}
+
+static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj, *tmp;
+
+	list_for_each_entry_safe(obj, tmp, &con->head, node) {
+		amdgpu_ras_interrupt_remove_handler(adev, &obj->head);
+	}
+
+	return 0;
+}
+/* ih end */
+
+/* traversal all IPs except NBIO to query error counter */
+static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj;
+
+	if (!adev->ras_enabled || !con)
+		return;
+
+	list_for_each_entry(obj, &con->head, node) {
+		struct ras_query_if info = {
+			.head = obj->head,
+		};
+
+		/*
+		 * PCIE_BIF IP has one different isr by ras controller
+		 * interrupt, the specific ras counter query will be
+		 * done in that isr. So skip such block from common
+		 * sync flood interrupt isr calling.
+		 */
+		if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
+			continue;
+
+		/*
+		 * this is a workaround for aldebaran, skip send msg to
+		 * smu to get ecc_info table due to smu handle get ecc
+		 * info table failed temporarily.
+		 * should be removed until smu fix handle ecc_info table.
+		 */
+		if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) &&
+		    (amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+		     IP_VERSION(13, 0, 2)))
+			continue;
+
+		amdgpu_ras_query_error_status_with_event(adev, &info, type);
+
+		if (amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+			    IP_VERSION(11, 0, 2) &&
+		    amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+			    IP_VERSION(11, 0, 4) &&
+		    amdgpu_ip_version(adev, MP0_HWIP, 0) !=
+			    IP_VERSION(13, 0, 0)) {
+			if (amdgpu_ras_reset_error_status(adev, info.head.block))
+				dev_warn(adev->dev, "Failed to reset error counter and error status");
+		}
+	}
+}
+
+/* Parse RdRspStatus and WrRspStatus */
+static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
+					  struct ras_query_if *info)
+{
+	struct amdgpu_ras_block_object *block_obj;
+	/*
+	 * Only two block need to query read/write
+	 * RspStatus at current state
+	 */
+	if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) &&
+		(info->head.block != AMDGPU_RAS_BLOCK__MMHUB))
+		return;
+
+	block_obj = amdgpu_ras_get_ras_block(adev,
+					info->head.block,
+					info->head.sub_block_index);
+
+	if (!block_obj || !block_obj->hw_ops) {
+		dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
+			     get_ras_block_str(&info->head));
+		return;
+	}
+
+	if (block_obj->hw_ops->query_ras_error_status)
+		block_obj->hw_ops->query_ras_error_status(adev);
+
+}
+
+static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj;
+
+	if (!adev->ras_enabled || !con)
+		return;
+
+	list_for_each_entry(obj, &con->head, node) {
+		struct ras_query_if info = {
+			.head = obj->head,
+		};
+
+		amdgpu_ras_error_status_query(adev, &info);
+	}
+}
+
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	int r = 0;
+	uint32_t i;
+
+	if (!con || !con->eh_data || !bps || !count)
+		return -EINVAL;
+
+	mutex_lock(&con->recovery_lock);
+	data = con->eh_data;
+	if (start < data->count) {
+		for (i = start; i < data->count; i++) {
+			if (!data->bps[i].ts)
+				continue;
+
+			bps[r].bp = data->bps[i].retired_page;
+			r++;
+			if (r >= count)
+				break;
+		}
+	}
+	mutex_unlock(&con->recovery_lock);
+
+	return r;
+}
+
+static int amdgpu_uniras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage *bps, uint32_t count, uint32_t start)
+{
+	struct ras_cmd_bad_pages_info_req cmd_input;
+	struct ras_cmd_bad_pages_info_rsp *output;
+	uint32_t group, start_group, end_group;
+	uint32_t pos, pos_in_group;
+	int r = 0, i;
+
+	if (!bps || !count)
+		return -EINVAL;
+
+	output = kmalloc(sizeof(*output), GFP_KERNEL);
+	if (!output)
+		return -ENOMEM;
+
+	memset(&cmd_input, 0, sizeof(cmd_input));
+
+	start_group = start / RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+	end_group = (start + count + RAS_CMD_MAX_BAD_PAGES_PER_GROUP - 1) /
+				RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+
+	pos = start;
+	for (group = start_group; group < end_group; group++) {
+		memset(output, 0, sizeof(*output));
+		cmd_input.group_index = group;
+		if (amdgpu_ras_mgr_handle_ras_cmd(adev, RAS_CMD__GET_BAD_PAGES,
+			&cmd_input, sizeof(cmd_input), output, sizeof(*output)))
+			goto out;
+
+		if (pos >= output->bp_total_cnt)
+			goto out;
+
+		pos_in_group = pos - group * RAS_CMD_MAX_BAD_PAGES_PER_GROUP;
+		for (i = pos_in_group; i < output->bp_in_group; i++, pos++) {
+			if (!output->records[i].ts)
+				continue;
+
+			bps[r].bp = output->records[i].retired_page;
+			r++;
+			if (r >= count)
+				goto out;
+		}
+	}
+
+out:
+	kfree(output);
+	return r;
+}
+
+static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
+				   struct amdgpu_hive_info *hive, bool status)
+{
+	struct amdgpu_device *tmp_adev;
+
+	if (hive) {
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+			amdgpu_ras_set_fed(tmp_adev, status);
+	} else {
+		amdgpu_ras_set_fed(adev, status);
+	}
+}
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int hive_ras_recovery = 0;
+
+	if (hive) {
+		hive_ras_recovery = atomic_read(&hive->ras_recovery);
+		amdgpu_put_xgmi_hive(hive);
+	}
+
+	if (ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
+		return true;
+
+	return false;
+}
+
+static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_intr_triggered())
+		return RAS_EVENT_TYPE_FATAL;
+	else
+		return RAS_EVENT_TYPE_POISON_CONSUMPTION;
+}
+
+static void amdgpu_ras_do_recovery(struct work_struct *work)
+{
+	struct amdgpu_ras *ras =
+		container_of(work, struct amdgpu_ras, recovery_work);
+	struct amdgpu_device *remote_adev = NULL;
+	struct amdgpu_device *adev = ras->adev;
+	struct list_head device_list, *device_list_handle =  NULL;
+	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+	unsigned int error_query_mode;
+	enum ras_event_type type;
+
+	if (hive) {
+		atomic_set(&hive->ras_recovery, 1);
+
+		/* If any device which is part of the hive received RAS fatal
+		 * error interrupt, set fatal error status on all. This
+		 * condition will need a recovery, and flag will be cleared
+		 * as part of recovery.
+		 */
+		list_for_each_entry(remote_adev, &hive->device_list,
+				    gmc.xgmi.head)
+			if (amdgpu_ras_get_fed_status(remote_adev)) {
+				amdgpu_ras_set_fed_all(adev, hive, true);
+				break;
+			}
+	}
+	if (!ras->disable_ras_err_cnt_harvest) {
+
+		/* Build list of devices to query RAS related errors */
+		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
+			device_list_handle = &hive->device_list;
+		} else {
+			INIT_LIST_HEAD(&device_list);
+			list_add_tail(&adev->gmc.xgmi.head, &device_list);
+			device_list_handle = &device_list;
+		}
+
+		if (amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) {
+			if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY) {
+				/* wait 500ms to ensure pmfw polling mca bank info done */
+				msleep(500);
+			}
+		}
+
+		type = amdgpu_ras_get_fatal_error_event(adev);
+		list_for_each_entry(remote_adev,
+				device_list_handle, gmc.xgmi.head) {
+			if (amdgpu_uniras_enabled(remote_adev)) {
+				amdgpu_ras_mgr_update_ras_ecc(remote_adev);
+			} else {
+				amdgpu_ras_query_err_status(remote_adev);
+				amdgpu_ras_log_on_err_counter(remote_adev, type);
+			}
+		}
+
+	}
+
+	if (amdgpu_device_should_recover_gpu(ras->adev)) {
+		struct amdgpu_reset_context reset_context;
+		memset(&reset_context, 0, sizeof(reset_context));
+
+		reset_context.method = AMD_RESET_METHOD_NONE;
+		reset_context.reset_req_dev = adev;
+		reset_context.src = AMDGPU_RESET_SRC_RAS;
+		set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+		/* Perform full reset in fatal error mode */
+		if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+		else {
+			clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+			if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
+				ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+				reset_context.method = AMD_RESET_METHOD_MODE2;
+			}
+
+			/* Fatal error occurs in poison mode, mode1 reset is used to
+			 * recover gpu.
+			 */
+			if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
+				ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+				set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+				psp_fatal_error_recovery_quirk(&adev->psp);
+			}
+		}
+
+		amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+	}
+	atomic_set(&ras->in_recovery, 0);
+	if (hive) {
+		atomic_set(&hive->ras_recovery, 0);
+		amdgpu_put_xgmi_hive(hive);
+	}
+}
+
+/* alloc/realloc bps array */
+static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
+		struct ras_err_handler_data *data, int pages)
+{
+	unsigned int old_space = data->count + data->space_left;
+	unsigned int new_space = old_space + pages;
+	unsigned int align_space = ALIGN(new_space, 512);
+	void *bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
+
+	if (!bps) {
+		return -ENOMEM;
+	}
+
+	if (data->bps) {
+		memcpy(bps, data->bps,
+				data->count * sizeof(*data->bps));
+		kfree(data->bps);
+	}
+
+	data->bps = bps;
+	data->space_left += align_space - old_space;
+	return 0;
+}
+
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+			struct eeprom_table_record *bps,
+			struct ras_err_data *err_data)
+{
+	struct ta_ras_query_address_input addr_in;
+	uint32_t socket = 0;
+	int ret = 0;
+
+	if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+		socket = adev->smuio.funcs->get_socket_id(adev);
+
+	/* reinit err_data */
+	err_data->err_addr_cnt = 0;
+	err_data->err_addr_len = adev->umc.retire_unit;
+
+	memset(&addr_in, 0, sizeof(addr_in));
+	addr_in.ma.err_addr = bps->address;
+	addr_in.ma.socket_id = socket;
+	addr_in.ma.ch_inst = bps->mem_channel;
+	if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+		/* tell RAS TA the node instance is not used */
+		addr_in.ma.node_inst = TA_RAS_INV_NODE;
+	} else {
+		addr_in.ma.umc_inst = bps->mcumc_id;
+		addr_in.ma.node_inst = bps->cu;
+	}
+
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+				&addr_in, NULL, false);
+
+	return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+			struct eeprom_table_record *bps,
+			struct ras_err_data *err_data)
+{
+	struct ta_ras_query_address_input addr_in;
+	uint32_t die_id, socket = 0;
+
+	if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+		socket = adev->smuio.funcs->get_socket_id(adev);
+
+	/* although die id is gotten from PA in nps1 mode, the id is
+	 * fitable for any nps mode
+	 */
+	if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+		die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+					bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+	else
+		return -EINVAL;
+
+	/* reinit err_data */
+	err_data->err_addr_cnt = 0;
+	err_data->err_addr_len = adev->umc.retire_unit;
+
+	memset(&addr_in, 0, sizeof(addr_in));
+	addr_in.ma.err_addr = bps->address;
+	addr_in.ma.ch_inst = bps->mem_channel;
+	addr_in.ma.umc_inst = bps->mcumc_id;
+	addr_in.ma.node_inst = die_id;
+	addr_in.ma.socket_id = socket;
+
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+					&addr_in, NULL, false);
+	else
+		return  -EINVAL;
+}
+
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+					struct eeprom_table_record *bps, int count)
+{
+	int j;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data = con->eh_data;
+
+	for (j = 0; j < count; j++) {
+		if (amdgpu_ras_check_bad_page_unlock(con,
+			bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+			data->count++;
+			data->space_left--;
+			continue;
+		}
+
+		if (!data->space_left &&
+		    amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+			return -ENOMEM;
+		}
+
+		amdgpu_ras_reserve_page(adev, bps[j].retired_page);
+
+		memcpy(&data->bps[data->count], &(bps[j]),
+				sizeof(struct eeprom_table_record));
+		data->count++;
+		data->space_left--;
+		con->bad_page_num++;
+	}
+
+	return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+				struct eeprom_table_record *bps, struct ras_err_data *err_data,
+				enum amdgpu_memory_partition nps)
+{
+	int i = 0;
+	enum amdgpu_memory_partition save_nps;
+
+	save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+	/*old asics just have pa in eeprom*/
+	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+		memcpy(err_data->err_addr, bps,
+			sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+		goto out;
+	}
+
+	for (i = 0; i < adev->umc.retire_unit; i++)
+		bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+	if (save_nps) {
+		if (save_nps == nps) {
+			if (amdgpu_umc_pages_in_a_row(adev, err_data,
+					bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+				return -EINVAL;
+			for (i = 0; i < adev->umc.retire_unit; i++) {
+				err_data->err_addr[i].address = bps[0].address;
+				err_data->err_addr[i].mem_channel = bps[0].mem_channel;
+				err_data->err_addr[i].bank = bps[0].bank;
+				err_data->err_addr[i].err_type = bps[0].err_type;
+				err_data->err_addr[i].mcumc_id = bps[0].mcumc_id;
+			}
+		} else {
+			if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+				return -EINVAL;
+		}
+	} else {
+		if (bps[0].address == 0) {
+			/* for specific old eeprom data, mca address is not stored,
+			 * calc it from pa
+			 */
+			if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+				&(bps[0].address), AMDGPU_NPS1_PARTITION_MODE))
+				return -EINVAL;
+		}
+
+		if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+			if (nps == AMDGPU_NPS1_PARTITION_MODE)
+				memcpy(err_data->err_addr, bps,
+					sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+			else
+				return -EOPNOTSUPP;
+		}
+	}
+
+out:
+	return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+				struct eeprom_table_record *bps, struct ras_err_data *err_data,
+				enum amdgpu_memory_partition nps)
+{
+	int i = 0;
+	enum amdgpu_memory_partition save_nps;
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+		save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+		bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+	} else {
+		/* if pmfw manages eeprom, save_nps is not stored on eeprom,
+		 * we should always convert mca address into physical address,
+		 * make save_nps different from nps
+		 */
+		save_nps = nps + 1;
+	}
+
+	if (save_nps == nps) {
+		if (amdgpu_umc_pages_in_a_row(adev, err_data,
+				bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+			return -EINVAL;
+		for (i = 0; i < adev->umc.retire_unit; i++) {
+			err_data->err_addr[i].address = bps->address;
+			err_data->err_addr[i].mem_channel = bps->mem_channel;
+			err_data->err_addr[i].bank = bps->bank;
+			err_data->err_addr[i].err_type = bps->err_type;
+			err_data->err_addr[i].mcumc_id = bps->mcumc_id;
+		}
+	} else {
+		if (bps->address) {
+			if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+				return -EINVAL;
+		} else {
+			/* for specific old eeprom data, mca address is not stored,
+			 * calc it from pa
+			 */
+			if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+				&(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+				return -EINVAL;
+
+			if (amdgpu_ras_mca2pa(adev, bps, err_data))
+				return -EOPNOTSUPP;
+		}
+	}
+
+	return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+									adev->umc.retire_unit);
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+		struct eeprom_table_record *bps, int pages, bool from_rom)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_data err_data;
+	struct amdgpu_ras_eeprom_control *control =
+			&adev->psp.ras_context.ras->eeprom_control;
+	enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+	int ret = 0;
+	uint32_t i = 0;
+
+	if (!con || !con->eh_data || !bps || pages <= 0)
+		return 0;
+
+	if (from_rom) {
+		err_data.err_addr =
+			kcalloc(adev->umc.retire_unit,
+				sizeof(struct eeprom_table_record), GFP_KERNEL);
+		if (!err_data.err_addr) {
+			dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+			return -ENOMEM;
+		}
+
+		if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+			nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	}
+
+	mutex_lock(&con->recovery_lock);
+
+	if (from_rom) {
+		/* there is no pa recs in V3, so skip pa recs processing */
+		if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+		    !amdgpu_ras_smu_eeprom_supported(adev)) {
+			for (i = 0; i < pages; i++) {
+				if (control->ras_num_recs - i >= adev->umc.retire_unit) {
+					if ((bps[i].address == bps[i + 1].address) &&
+						(bps[i].mem_channel == bps[i + 1].mem_channel)) {
+						/* deal with retire_unit records a time */
+						ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
+										&bps[i], &err_data, nps);
+						if (ret)
+							con->bad_page_num -= adev->umc.retire_unit;
+						i += (adev->umc.retire_unit - 1);
+					} else {
+						break;
+					}
+				} else {
+					break;
+				}
+			}
+		}
+		for (; i < pages; i++) {
+			ret = __amdgpu_ras_convert_rec_from_rom(adev,
+				&bps[i], &err_data, nps);
+			if (ret)
+				con->bad_page_num -= adev->umc.retire_unit;
+		}
+
+		con->eh_data->count_saved = con->eh_data->count;
+	} else {
+		ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
+	}
+
+	if (from_rom)
+		kfree(err_data.err_addr);
+	mutex_unlock(&con->recovery_lock);
+
+	return ret;
+}
+
+/*
+ * write error record array to eeprom, the function should be
+ * protected by recovery_lock
+ * new_cnt: new added UE count, excluding reserved bad pages, can be NULL
+ */
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+		unsigned long *new_cnt)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	struct amdgpu_ras_eeprom_control *control;
+	int save_count, unit_num, i;
+
+	if (!con || !con->eh_data) {
+		if (new_cnt)
+			*new_cnt = 0;
+
+		return 0;
+	}
+
+	if (!con->eeprom_control.is_eeprom_valid) {
+		dev_warn(adev->dev,
+			"Failed to save EEPROM table data because of EEPROM data corruption!");
+		if (new_cnt)
+			*new_cnt = 0;
+
+		return 0;
+	}
+
+	mutex_lock(&con->recovery_lock);
+	control = &con->eeprom_control;
+	data = con->eh_data;
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		unit_num = control->ras_num_recs -
+			control->ras_num_recs_old;
+	else
+		unit_num = data->count / adev->umc.retire_unit -
+			control->ras_num_recs;
+
+	save_count = con->bad_page_num - control->ras_num_bad_pages;
+	mutex_unlock(&con->recovery_lock);
+
+	if (new_cnt)
+		*new_cnt = unit_num;
+
+	/* only new entries are saved */
+	if (unit_num && save_count) {
+		/*old asics only save pa to eeprom like before*/
+		if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+			if (amdgpu_ras_eeprom_append(control,
+					&data->bps[data->count_saved], unit_num)) {
+				dev_err(adev->dev, "Failed to save EEPROM table data!");
+				return -EIO;
+			}
+		} else {
+			for (i = 0; i < unit_num; i++) {
+				if (amdgpu_ras_eeprom_append(control,
+						&data->bps[data->count_saved +
+						i * adev->umc.retire_unit], 1)) {
+					dev_err(adev->dev, "Failed to save EEPROM table data!");
+					return -EIO;
+				}
+			}
+		}
+
+		dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
+		data->count_saved = data->count;
+	}
+
+	return 0;
+}
+
+/*
+ * read error record array in eeprom and reserve enough space for
+ * storing new bad pages
+ */
+static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras_eeprom_control *control =
+		&adev->psp.ras_context.ras->eeprom_control;
+	struct eeprom_table_record *bps;
+	int ret, i = 0;
+
+	/* no bad page record, skip eeprom access */
+	if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
+		return 0;
+
+	bps = kcalloc(control->ras_num_recs, sizeof(*bps), GFP_KERNEL);
+	if (!bps)
+		return -ENOMEM;
+
+	ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
+	if (ret) {
+		dev_err(adev->dev, "Failed to load EEPROM table records!");
+	} else {
+		if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+			/*In V3, there is no pa recs, and some cases(when address==0) may be parsed
+			as pa recs, so add verion check to avoid it.
+			*/
+			if ((control->tbl_hdr.version < RAS_TABLE_VER_V3) &&
+			    !amdgpu_ras_smu_eeprom_supported(adev)) {
+				for (i = 0; i < control->ras_num_recs; i++) {
+					if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+						if ((bps[i].address == bps[i + 1].address) &&
+							(bps[i].mem_channel == bps[i + 1].mem_channel)) {
+							control->ras_num_pa_recs += adev->umc.retire_unit;
+							i += (adev->umc.retire_unit - 1);
+						} else {
+							control->ras_num_mca_recs +=
+										(control->ras_num_recs - i);
+							break;
+						}
+					} else {
+						control->ras_num_mca_recs += (control->ras_num_recs - i);
+						break;
+					}
+				}
+			} else {
+				control->ras_num_mca_recs = control->ras_num_recs;
+			}
+		}
+
+		ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+		if (ret)
+			goto out;
+
+		ret = amdgpu_ras_eeprom_check(control);
+		if (ret)
+			goto out;
+
+		/* HW not usable */
+		if (amdgpu_ras_is_rma(adev))
+			ret = -EHWPOISON;
+	}
+
+out:
+	kfree(bps);
+	return ret;
+}
+
+static int amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+				uint64_t addr)
+{
+	struct ras_err_handler_data *data = con->eh_data;
+	struct amdgpu_device *adev = con->adev;
+	int i;
+
+	if ((addr >= adev->gmc.mc_vram_size &&
+	    adev->gmc.mc_vram_size) ||
+	    (addr >= RAS_UMC_INJECT_ADDR_LIMIT))
+		return -EINVAL;
+
+	addr >>= AMDGPU_GPU_PAGE_SHIFT;
+	for (i = 0; i < data->count; i++)
+		if (addr == data->bps[i].retired_page)
+			return 1;
+
+	return 0;
+}
+
+/*
+ * check if an address belongs to bad page
+ *
+ * Note: this check is only for umc block
+ */
+static int amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
+				uint64_t addr)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	if (!con || !con->eh_data)
+		return ret;
+
+	mutex_lock(&con->recovery_lock);
+	ret = amdgpu_ras_check_bad_page_unlock(con, addr);
+	mutex_unlock(&con->recovery_lock);
+	return ret;
+}
+
+static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
+					  uint32_t max_count)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	/*
+	 * amdgpu_bad_page_threshold is used to config
+	 * the threshold for the number of bad pages.
+	 * -1:  Threshold is set to default value
+	 *      Driver will issue a warning message when threshold is reached
+	 *      and continue runtime services.
+	 * 0:   Disable bad page retirement
+	 *      Driver will not retire bad pages
+	 *      which is intended for debugging purpose.
+	 * -2:  Threshold is determined by a formula
+	 *      that assumes 1 bad page per 100M of local memory.
+	 *      Driver will continue runtime services when threhold is reached.
+	 * 0 < threshold < max number of bad page records in EEPROM,
+	 *      A user-defined threshold is set
+	 *      Driver will halt runtime services when this custom threshold is reached.
+	 */
+	if (amdgpu_bad_page_threshold == -2) {
+		u64 val = adev->gmc.mc_vram_size;
+
+		do_div(val, RAS_BAD_PAGE_COVER);
+		con->bad_page_cnt_threshold = min(lower_32_bits(val),
+						  max_count);
+	} else if (amdgpu_bad_page_threshold == -1) {
+		con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
+	} else {
+		con->bad_page_cnt_threshold = min_t(int, max_count,
+						    amdgpu_bad_page_threshold);
+	}
+}
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block, uint16_t pasid,
+		pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+	int ret = 0;
+	struct ras_poison_msg poison_msg;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	memset(&poison_msg, 0, sizeof(poison_msg));
+	poison_msg.block = block;
+	poison_msg.pasid = pasid;
+	poison_msg.reset = reset;
+	poison_msg.pasid_fn = pasid_fn;
+	poison_msg.data = data;
+
+	ret = kfifo_put(&con->poison_fifo, poison_msg);
+	if (!ret) {
+		dev_err(adev->dev, "Poison message fifo is full!\n");
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+		struct ras_poison_msg *poison_msg)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+	mutex_init(&ecc_log->lock);
+
+	INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+	ecc_log->de_queried_count = 0;
+	ecc_log->consumption_q_count = 0;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	struct ras_ecc_err *ecc_err;
+
+	mutex_lock(&ecc_log->lock);
+	radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+		ecc_err = radix_tree_deref_slot(slot);
+		kfree(ecc_err->err_pages.pfn);
+		kfree(ecc_err);
+		radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+	}
+	mutex_unlock(&ecc_log->lock);
+
+	mutex_destroy(&ecc_log->lock);
+	ecc_log->de_queried_count = 0;
+	ecc_log->consumption_q_count = 0;
+}
+
+static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con,
+				uint32_t delayed_ms)
+{
+	int ret;
+
+	mutex_lock(&con->umc_ecc_log.lock);
+	ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+			UMC_ECC_NEW_DETECTED_TAG);
+	mutex_unlock(&con->umc_ecc_log.lock);
+
+	if (ret)
+		schedule_delayed_work(&con->page_retirement_dwork,
+			msecs_to_jiffies(delayed_ms));
+
+	return ret ? true : false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+	struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+					      page_retirement_dwork.work);
+	struct amdgpu_device *adev = con->adev;
+	struct ras_err_data err_data;
+
+	/* If gpu reset is ongoing, delay retiring the bad pages */
+	if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) {
+		amdgpu_ras_schedule_retirement_dwork(con,
+				AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3);
+		return;
+	}
+
+	amdgpu_ras_error_data_init(&err_data);
+
+	amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+	amdgpu_ras_error_data_fini(&err_data);
+
+	amdgpu_ras_schedule_retirement_dwork(con,
+			AMDGPU_RAS_RETIRE_PAGE_INTERVAL);
+}
+
+static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+				uint32_t poison_creation_count)
+{
+	int ret = 0;
+	struct ras_ecc_log_info *ecc_log;
+	struct ras_query_if info;
+	u32 timeout = MAX_UMC_POISON_POLLING_TIME_ASYNC;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	u64 de_queried_count;
+	u64 consumption_q_count;
+	enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION;
+
+	memset(&info, 0, sizeof(info));
+	info.head.block = AMDGPU_RAS_BLOCK__UMC;
+
+	ecc_log = &ras->umc_ecc_log;
+	ecc_log->de_queried_count = 0;
+	ecc_log->consumption_q_count = 0;
+
+	do {
+		ret = amdgpu_ras_query_error_status_with_event(adev, &info, type);
+		if (ret)
+			return ret;
+
+		de_queried_count = ecc_log->de_queried_count;
+		consumption_q_count = ecc_log->consumption_q_count;
+
+		if (de_queried_count && consumption_q_count)
+			break;
+
+		msleep(100);
+	} while (--timeout);
+
+	if (de_queried_count)
+		schedule_delayed_work(&ras->page_retirement_dwork, 0);
+
+	if (amdgpu_ras_is_rma(adev) && atomic_cmpxchg(&ras->rma_in_recovery, 0, 1) == 0)
+		amdgpu_ras_reset_gpu(adev);
+
+	return 0;
+}
+
+static void amdgpu_ras_clear_poison_fifo(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_poison_msg msg;
+	int ret;
+
+	do {
+		ret = kfifo_get(&con->poison_fifo, &msg);
+	} while (ret);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+			uint32_t msg_count, uint32_t *gpu_reset)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	uint32_t reset_flags = 0, reset = 0;
+	struct ras_poison_msg msg;
+	int ret, i;
+
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+	for (i = 0; i < msg_count; i++) {
+		ret = amdgpu_ras_get_poison_req(adev, &msg);
+		if (!ret)
+			continue;
+
+		if (msg.pasid_fn)
+			msg.pasid_fn(adev, msg.pasid, msg.data);
+
+		reset_flags |= msg.reset;
+	}
+
+	/*
+	 * Try to ensure poison creation handler is completed first
+	 * to set rma if bad page exceed threshold.
+	 */
+	flush_delayed_work(&con->page_retirement_dwork);
+
+	/* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
+	if (reset_flags && !amdgpu_ras_is_rma(adev)) {
+		if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
+			reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+		else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
+			reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+		else
+			reset = reset_flags;
+
+		con->gpu_reset_flags |= reset;
+		amdgpu_ras_reset_gpu(adev);
+
+		*gpu_reset = reset;
+
+		/* Wait for gpu recovery to complete */
+		flush_work(&con->recovery_work);
+	}
+
+	return 0;
+}
+
+static int amdgpu_ras_page_retirement_thread(void *param)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)param;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	uint32_t poison_creation_count, msg_count;
+	uint32_t gpu_reset;
+	int ret;
+
+	while (!kthread_should_stop()) {
+
+		wait_event_interruptible(con->page_retirement_wq,
+				kthread_should_stop() ||
+				atomic_read(&con->page_retirement_req_cnt));
+
+		if (kthread_should_stop())
+			break;
+
+		mutex_lock(&con->poison_lock);
+		gpu_reset = 0;
+
+		do {
+			poison_creation_count = atomic_read(&con->poison_creation_count);
+			ret = amdgpu_ras_poison_creation_handler(adev, poison_creation_count);
+			if (ret == -EIO)
+				break;
+
+			if (poison_creation_count) {
+				atomic_sub(poison_creation_count, &con->poison_creation_count);
+				atomic_sub(poison_creation_count, &con->page_retirement_req_cnt);
+			}
+		} while (atomic_read(&con->poison_creation_count) &&
+			!atomic_read(&con->poison_consumption_count));
+
+		if (ret != -EIO) {
+			msg_count = kfifo_len(&con->poison_fifo);
+			if (msg_count) {
+				ret = amdgpu_ras_poison_consumption_handler(adev,
+						msg_count, &gpu_reset);
+				if ((ret != -EIO) &&
+				    (gpu_reset != AMDGPU_RAS_GPU_RESET_MODE1_RESET))
+					atomic_sub(msg_count, &con->page_retirement_req_cnt);
+			}
+		}
+
+		if ((ret == -EIO) || (gpu_reset == AMDGPU_RAS_GPU_RESET_MODE1_RESET)) {
+			/* gpu mode-1 reset is ongoing or just completed ras mode-1 reset */
+			/* Clear poison creation request */
+			atomic_set(&con->poison_creation_count, 0);
+			atomic_set(&con->poison_consumption_count, 0);
+
+			/* Clear poison fifo */
+			amdgpu_ras_clear_poison_fifo(adev);
+
+			/* Clear all poison requests */
+			atomic_set(&con->page_retirement_req_cnt, 0);
+
+			if (ret == -EIO) {
+				/* Wait for mode-1 reset to complete */
+				down_read(&adev->reset_domain->sem);
+				up_read(&adev->reset_domain->sem);
+			}
+
+			/* Wake up work to save bad pages to eeprom */
+			schedule_delayed_work(&con->page_retirement_dwork, 0);
+		} else if (gpu_reset) {
+			/* gpu just completed mode-2 reset or other reset */
+			/* Clear poison consumption messages cached in fifo */
+			msg_count = kfifo_len(&con->poison_fifo);
+			if (msg_count) {
+				amdgpu_ras_clear_poison_fifo(adev);
+				atomic_sub(msg_count, &con->page_retirement_req_cnt);
+			}
+
+			atomic_set(&con->poison_consumption_count, 0);
+
+			/* Wake up work to save bad pages to eeprom */
+			schedule_delayed_work(&con->page_retirement_dwork, 0);
+		}
+		mutex_unlock(&con->poison_lock);
+	}
+
+	return 0;
+}
+
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control;
+	int ret;
+
+	if (!con || amdgpu_sriov_vf(adev))
+		return 0;
+
+	if (amdgpu_uniras_enabled(adev))
+		return 0;
+
+	control = &con->eeprom_control;
+	con->ras_smu_drv = amdgpu_dpm_get_ras_smu_driver(adev);
+
+	ret = amdgpu_ras_eeprom_init(control);
+	control->is_eeprom_valid = !ret;
+
+	if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+		control->ras_num_pa_recs = control->ras_num_recs;
+
+	if (adev->umc.ras &&
+	    adev->umc.ras->get_retire_flip_bits)
+		adev->umc.ras->get_retire_flip_bits(adev);
+
+	if (control->ras_num_recs && control->is_eeprom_valid) {
+		ret = amdgpu_ras_load_bad_pages(adev);
+		if (ret) {
+			control->is_eeprom_valid = false;
+			return 0;
+		}
+
+		amdgpu_dpm_send_hbm_bad_pages_num(
+			adev, control->ras_num_bad_pages);
+
+		if (con->update_channel_flag == true) {
+			amdgpu_dpm_send_hbm_bad_channel_flag(
+				adev, control->bad_channel_bitmap);
+			con->update_channel_flag = false;
+		}
+
+		/* The format action is only applied to new ASICs */
+		if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 &&
+		    control->tbl_hdr.version < RAS_TABLE_VER_V3)
+			if (!amdgpu_ras_eeprom_reset_table(control))
+				if (amdgpu_ras_save_bad_pages(adev, NULL))
+					dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
+	}
+
+	return 0;
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data **data;
+	u32  max_eeprom_records_count = 0;
+	int ret;
+
+	if (!con || amdgpu_sriov_vf(adev))
+		return 0;
+
+	/* Allow access to RAS EEPROM via debugfs, when the ASIC
+	 * supports RAS and debugfs is enabled, but when
+	 * adev->ras_enabled is unset, i.e. when "ras_enable"
+	 * module parameter is set to 0.
+	 */
+	con->adev = adev;
+
+	if (!adev->ras_enabled)
+		return 0;
+
+	data = &con->eh_data;
+	*data = kzalloc(sizeof(**data), GFP_KERNEL);
+	if (!*data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mutex_init(&con->recovery_lock);
+	mutex_init(&con->poison_lock);
+	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
+	atomic_set(&con->in_recovery, 0);
+	atomic_set(&con->rma_in_recovery, 0);
+	con->eeprom_control.bad_channel_bitmap = 0;
+
+	max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
+	amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
+
+	if (init_bp_info) {
+		ret = amdgpu_ras_init_badpage_info(adev);
+		if (ret)
+			goto free;
+	}
+
+	mutex_init(&con->page_rsv_lock);
+	INIT_KFIFO(con->poison_fifo);
+	mutex_init(&con->page_retirement_lock);
+	init_waitqueue_head(&con->page_retirement_wq);
+	atomic_set(&con->page_retirement_req_cnt, 0);
+	atomic_set(&con->poison_creation_count, 0);
+	atomic_set(&con->poison_consumption_count, 0);
+	con->page_retirement_thread =
+		kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
+	if (IS_ERR(con->page_retirement_thread)) {
+		con->page_retirement_thread = NULL;
+		dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
+	}
+
+	INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
+	amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
+#ifdef CONFIG_X86_MCE_AMD
+	if ((adev->asic_type == CHIP_ALDEBARAN) &&
+	    (adev->gmc.xgmi.connected_to_cpu))
+		amdgpu_register_bad_pages_mca_notifier(adev);
+#endif
+	return 0;
+
+free:
+	kfree((*data)->bps);
+	kfree(*data);
+	con->eh_data = NULL;
+out:
+	dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
+
+	/*
+	 * Except error threshold exceeding case, other failure cases in this
+	 * function would not fail amdgpu driver init.
+	 */
+	if (!amdgpu_ras_is_rma(adev))
+		ret = 0;
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data = con->eh_data;
+	int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES;
+	bool ret;
+
+	/* recovery_init failed to init it, fini is useless */
+	if (!data)
+		return 0;
+
+	/* Save all cached bad pages to eeprom */
+	do {
+		flush_delayed_work(&con->page_retirement_dwork);
+		ret = amdgpu_ras_schedule_retirement_dwork(con, 0);
+	} while (ret && max_flush_timeout--);
+
+	if (con->page_retirement_thread)
+		kthread_stop(con->page_retirement_thread);
+
+	atomic_set(&con->page_retirement_req_cnt, 0);
+	atomic_set(&con->poison_creation_count, 0);
+
+	mutex_destroy(&con->page_rsv_lock);
+
+	cancel_work_sync(&con->recovery_work);
+
+	cancel_delayed_work_sync(&con->page_retirement_dwork);
+
+	amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
+
+	mutex_lock(&con->recovery_lock);
+	con->eh_data = NULL;
+	kfree(data->bps);
+	kfree(data);
+	mutex_unlock(&con->recovery_lock);
+
+	amdgpu_ras_critical_region_init(adev);
+#ifdef CONFIG_X86_MCE_AMD
+	amdgpu_unregister_bad_pages_mca_notifier(adev);
+#endif
+	return 0;
+}
+/* recovery end */
+
+static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev)) {
+		switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+		case IP_VERSION(13, 0, 2):
+		case IP_VERSION(13, 0, 6):
+		case IP_VERSION(13, 0, 12):
+		case IP_VERSION(13, 0, 14):
+			return true;
+		default:
+			return false;
+		}
+	}
+
+	if (adev->asic_type == CHIP_IP_DISCOVERY) {
+		switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+		case IP_VERSION(13, 0, 0):
+		case IP_VERSION(13, 0, 6):
+		case IP_VERSION(13, 0, 10):
+		case IP_VERSION(13, 0, 12):
+		case IP_VERSION(13, 0, 14):
+		case IP_VERSION(14, 0, 3):
+			return true;
+		default:
+			return false;
+		}
+	}
+
+	return adev->asic_type == CHIP_VEGA10 ||
+		adev->asic_type == CHIP_VEGA20 ||
+		adev->asic_type == CHIP_ARCTURUS ||
+		adev->asic_type == CHIP_ALDEBARAN ||
+		adev->asic_type == CHIP_SIENNA_CICHLID;
+}
+
+/*
+ * this is workaround for vega20 workstation sku,
+ * force enable gfx ras, ignore vbios gfx ras flag
+ * due to GC EDC can not write
+ */
+static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
+{
+	struct atom_context *ctx = adev->mode_info.atom_context;
+
+	if (!ctx)
+		return;
+
+	if (strnstr(ctx->vbios_pn, "D16406",
+		    sizeof(ctx->vbios_pn)) ||
+		strnstr(ctx->vbios_pn, "D36002",
+			sizeof(ctx->vbios_pn)))
+		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+}
+
+/* Query ras capablity via atomfirmware interface */
+static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev)
+{
+	/* mem_ecc cap */
+	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
+		dev_info(adev->dev, "MEM ECC is active.\n");
+		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+					 1 << AMDGPU_RAS_BLOCK__DF);
+	} else {
+		dev_info(adev->dev, "MEM ECC is not presented.\n");
+	}
+
+	/* sram_ecc cap */
+	if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
+		dev_info(adev->dev, "SRAM ECC is active.\n");
+		if (!amdgpu_sriov_vf(adev))
+			adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+						  1 << AMDGPU_RAS_BLOCK__DF);
+		else
+			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
+						 1 << AMDGPU_RAS_BLOCK__SDMA |
+						 1 << AMDGPU_RAS_BLOCK__GFX);
+
+		/*
+		 * VCN/JPEG RAS can be supported on both bare metal and
+		 * SRIOV environment
+		 */
+		if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
+		    amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
+		    amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+		    amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1))
+			adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+						 1 << AMDGPU_RAS_BLOCK__JPEG);
+		else
+			adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+						  1 << AMDGPU_RAS_BLOCK__JPEG);
+
+		/*
+		 * XGMI RAS is not supported if xgmi num physical nodes
+		 * is zero
+		 */
+		if (!adev->gmc.xgmi.num_physical_nodes)
+			adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL);
+	} else {
+		dev_info(adev->dev, "SRAM ECC is not presented.\n");
+	}
+}
+
+/* Query poison mode from umc/df IP callbacks */
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	bool df_poison, umc_poison;
+
+	/* poison setting is useless on SRIOV guest */
+	if (amdgpu_sriov_vf(adev) || !con)
+		return;
+
+	/* Init poison supported flag, the default value is false */
+	if (adev->gmc.xgmi.connected_to_cpu ||
+	    adev->gmc.is_app_apu) {
+		/* enabled by default when GPU is connected to CPU */
+		con->poison_supported = true;
+	} else if (adev->df.funcs &&
+	    adev->df.funcs->query_ras_poison_mode &&
+	    adev->umc.ras &&
+	    adev->umc.ras->query_ras_poison_mode) {
+		df_poison =
+			adev->df.funcs->query_ras_poison_mode(adev);
+		umc_poison =
+			adev->umc.ras->query_ras_poison_mode(adev);
+
+		/* Only poison is set in both DF and UMC, we can support it */
+		if (df_poison && umc_poison)
+			con->poison_supported = true;
+		else if (df_poison != umc_poison)
+			dev_warn(adev->dev,
+				"Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+				df_poison, umc_poison);
+	}
+}
+
+/*
+ * check hardware's ras ability which will be saved in hw_supported.
+ * if hardware does not support ras, we can skip some ras initializtion and
+ * forbid some ras operations from IP.
+ * if software itself, say boot parameter, limit the ras ability. We still
+ * need allow IP do some limited operations, like disable. In such case,
+ * we have to initialize ras as normal. but need check if operation is
+ * allowed or not in each function.
+ */
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
+{
+	adev->ras_hw_enabled = adev->ras_enabled = 0;
+
+	if (!amdgpu_ras_asic_supported(adev))
+		return;
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (amdgpu_virt_get_ras_capability(adev))
+			goto init_ras_enabled_flag;
+	}
+
+	/* query ras capability from psp */
+	if (amdgpu_psp_get_ras_capability(&adev->psp))
+		goto init_ras_enabled_flag;
+
+	/* query ras capablity from bios */
+	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
+		amdgpu_ras_query_ras_capablity_from_vbios(adev);
+	} else {
+		/* driver only manages a few IP blocks RAS feature
+		 * when GPU is connected cpu through XGMI */
+		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
+					   1 << AMDGPU_RAS_BLOCK__SDMA |
+					   1 << AMDGPU_RAS_BLOCK__MMHUB);
+	}
+
+	/* apply asic specific settings (vega20 only for now) */
+	amdgpu_ras_get_quirks(adev);
+
+	/* query poison mode from umc/df ip callback */
+	amdgpu_ras_query_poison_mode(adev);
+
+init_ras_enabled_flag:
+	/* hw_supported needs to be aligned with RAS block mask. */
+	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
+
+	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+		adev->ras_hw_enabled & amdgpu_ras_mask;
+
+	/* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
+	if (!amdgpu_sriov_vf(adev)) {
+		adev->aca.is_enabled =
+			(amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+			amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+			amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+	}
+
+	/* bad page feature is not applicable to specific app platform */
+	if (adev->gmc.is_app_apu &&
+	    amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(12, 0, 0))
+		amdgpu_bad_page_threshold = 0;
+}
+
+static void amdgpu_ras_counte_dw(struct work_struct *work)
+{
+	struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+					      ras_counte_delay_work.work);
+	struct amdgpu_device *adev = con->adev;
+	struct drm_device *dev = adev_to_drm(adev);
+	unsigned long ce_count, ue_count;
+	int res;
+
+	res = pm_runtime_get_sync(dev->dev);
+	if (res < 0)
+		goto Out;
+
+	/* Cache new values.
+	 */
+	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL) == 0) {
+		atomic_set(&con->ras_ce_count, ce_count);
+		atomic_set(&con->ras_ue_count, ue_count);
+	}
+
+Out:
+	pm_runtime_put_autosuspend(dev->dev);
+}
+
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
+{
+	return  amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 |
+			AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE |
+			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE |
+			AMDGPU_RAS_ERROR__PARITY;
+}
+
+static void ras_event_mgr_init(struct ras_event_manager *mgr)
+{
+	struct ras_event_state *event_state;
+	int i;
+
+	memset(mgr, 0, sizeof(*mgr));
+	atomic64_set(&mgr->seqno, 0);
+
+	for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
+		event_state = &mgr->event_state[i];
+		event_state->last_seqno = RAS_EVENT_INVALID_ID;
+		atomic64_set(&event_state->count, 0);
+	}
+}
+
+static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct amdgpu_hive_info *hive;
+
+	if (!ras)
+		return;
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
+
+	/* init event manager with node 0 on xgmi system */
+	if (!amdgpu_reset_in_recovery(adev)) {
+		if (!hive || adev->gmc.xgmi.node_id == 0)
+			ras_event_mgr_init(ras->event_mgr);
+	}
+
+	if (hive)
+		amdgpu_put_xgmi_hive(hive);
+}
+
+static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!con || (adev->flags & AMD_IS_APU))
+		return;
+
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+		con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+		break;
+	case IP_VERSION(13, 0, 14):
+		con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
+		break;
+	default:
+		break;
+	}
+}
+
+int amdgpu_ras_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int r;
+
+	if (con)
+		return 0;
+
+	con = kzalloc(sizeof(*con) +
+			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
+			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
+			GFP_KERNEL);
+	if (!con)
+		return -ENOMEM;
+
+	con->adev = adev;
+	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
+	atomic_set(&con->ras_ce_count, 0);
+	atomic_set(&con->ras_ue_count, 0);
+
+	con->objs = (struct ras_manager *)(con + 1);
+
+	amdgpu_ras_set_context(adev, con);
+
+	amdgpu_ras_check_supported(adev);
+
+	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
+		/* set gfx block ras context feature for VEGA20 Gaming
+		 * send ras disable cmd to ras ta during ras late init.
+		 */
+		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
+			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
+
+			return 0;
+		}
+
+		r = 0;
+		goto release_con;
+	}
+
+	con->update_channel_flag = false;
+	con->features = 0;
+	con->schema = 0;
+	INIT_LIST_HEAD(&con->head);
+	/* Might need get this flag from vbios. */
+	con->flags = RAS_DEFAULT_FLAGS;
+
+	/* initialize nbio ras function ahead of any other
+	 * ras functions so hardware fatal error interrupt
+	 * can be enabled as early as possible */
+	switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+	case IP_VERSION(7, 4, 0):
+	case IP_VERSION(7, 4, 1):
+	case IP_VERSION(7, 4, 4):
+		if (!adev->gmc.xgmi.connected_to_cpu)
+			adev->nbio.ras = &nbio_v7_4_ras;
+		break;
+	case IP_VERSION(4, 3, 0):
+		if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+			/* unlike other generation of nbio ras,
+			 * nbio v4_3 only support fatal error interrupt
+			 * to inform software that DF is freezed due to
+			 * system fatal error event. driver should not
+			 * enable nbio ras in such case. Instead,
+			 * check DF RAS */
+			adev->nbio.ras = &nbio_v4_3_ras;
+		break;
+	case IP_VERSION(6, 3, 1):
+		if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+			/* unlike other generation of nbio ras,
+			 * nbif v6_3_1 only support fatal error interrupt
+			 * to inform software that DF is freezed due to
+			 * system fatal error event. driver should not
+			 * enable nbio ras in such case. Instead,
+			 * check DF RAS
+			 */
+			adev->nbio.ras = &nbif_v6_3_1_ras;
+		break;
+	case IP_VERSION(7, 9, 0):
+	case IP_VERSION(7, 9, 1):
+		if (!adev->gmc.is_app_apu)
+			adev->nbio.ras = &nbio_v7_9_ras;
+		break;
+	default:
+		/* nbio ras is not available */
+		break;
+	}
+
+	/* nbio ras block needs to be enabled ahead of other ras blocks
+	 * to handle fatal error */
+	r = amdgpu_nbio_ras_sw_init(adev);
+	if (r)
+		return r;
+
+	if (adev->nbio.ras &&
+	    adev->nbio.ras->init_ras_controller_interrupt) {
+		r = adev->nbio.ras->init_ras_controller_interrupt(adev);
+		if (r)
+			goto release_con;
+	}
+
+	if (adev->nbio.ras &&
+	    adev->nbio.ras->init_ras_err_event_athub_interrupt) {
+		r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
+		if (r)
+			goto release_con;
+	}
+
+	/* Packed socket_id to ras feature mask bits[31:29] */
+	if (adev->smuio.funcs &&
+	    adev->smuio.funcs->get_socket_id)
+		con->features |= ((adev->smuio.funcs->get_socket_id(adev)) <<
+					AMDGPU_RAS_FEATURES_SOCKETID_SHIFT);
+
+	/* Get RAS schema for particular SOC */
+	con->schema = amdgpu_get_ras_schema(adev);
+
+	amdgpu_ras_init_reserved_vram_size(adev);
+
+	if (amdgpu_ras_fs_init(adev)) {
+		r = -EINVAL;
+		goto release_con;
+	}
+
+	if (amdgpu_ras_aca_is_supported(adev)) {
+		if (amdgpu_aca_is_enabled(adev))
+			r = amdgpu_aca_init(adev);
+		else
+			r = amdgpu_mca_init(adev);
+		if (r)
+			goto release_con;
+	}
+
+	con->init_task_pid = task_pid_nr(current);
+	get_task_comm(con->init_task_comm, current);
+
+	mutex_init(&con->critical_region_lock);
+	INIT_LIST_HEAD(&con->critical_region_head);
+
+	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
+		 "hardware ability[%x] ras_mask[%x]\n",
+		 adev->ras_hw_enabled, adev->ras_enabled);
+
+	return 0;
+release_con:
+	amdgpu_ras_set_context(adev, NULL);
+	kfree(con);
+
+	return r;
+}
+
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev)
+{
+	if (adev->gmc.xgmi.connected_to_cpu ||
+	    adev->gmc.is_app_apu)
+		return 1;
+	return 0;
+}
+
+static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,
+					struct ras_common_if *ras_block)
+{
+	struct ras_query_if info = {
+		.head = *ras_block,
+	};
+
+	if (!amdgpu_persistent_edc_harvesting_supported(adev))
+		return 0;
+
+	if (amdgpu_ras_query_error_status(adev, &info) != 0)
+		DRM_WARN("RAS init harvest failure");
+
+	if (amdgpu_ras_reset_error_status(adev, ras_block->block) != 0)
+		DRM_WARN("RAS init harvest reset failure");
+
+	return 0;
+}
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+       if (!con)
+               return false;
+
+       return con->poison_supported;
+}
+
+/* helper function to handle common stuff in ip late init phase */
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+			 struct ras_common_if *ras_block)
+{
+	struct amdgpu_ras_block_object *ras_obj = NULL;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_query_if *query_info;
+	unsigned long ue_count, ce_count;
+	int r;
+
+	/* disable RAS feature per IP block if it is not supported */
+	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
+		return 0;
+	}
+
+	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
+	if (r) {
+		if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) {
+			/* in resume phase, if fail to enable ras,
+			 * clean up all ras fs nodes, and disable ras */
+			goto cleanup;
+		} else
+			return r;
+	}
+
+	/* check for errors on warm reset edc persisant supported ASIC */
+	amdgpu_persistent_edc_harvesting(adev, ras_block);
+
+	/* in resume phase, no need to create ras fs node */
+	if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
+		return 0;
+
+	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+	if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+	    (ras_obj->hw_ops->query_poison_status ||
+	    ras_obj->hw_ops->handle_poison_consumption))) {
+		r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
+		if (r)
+			goto cleanup;
+	}
+
+	if (ras_obj->hw_ops &&
+	    (ras_obj->hw_ops->query_ras_error_count ||
+	     ras_obj->hw_ops->query_ras_error_status)) {
+		r = amdgpu_ras_sysfs_create(adev, ras_block);
+		if (r)
+			goto interrupt;
+
+		/* Those are the cached values at init.
+		 */
+		query_info = kzalloc(sizeof(*query_info), GFP_KERNEL);
+		if (!query_info)
+			return -ENOMEM;
+		memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if));
+
+		if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) {
+			atomic_set(&con->ras_ce_count, ce_count);
+			atomic_set(&con->ras_ue_count, ue_count);
+		}
+
+		kfree(query_info);
+	}
+
+	return 0;
+
+interrupt:
+	if (ras_obj->ras_cb)
+		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+cleanup:
+	amdgpu_ras_feature_enable(adev, ras_block, 0);
+	return r;
+}
+
+static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev,
+			 struct ras_common_if *ras_block)
+{
+	return amdgpu_ras_block_late_init(adev, ras_block);
+}
+
+/* helper function to remove ras fs node and interrupt handler */
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+			  struct ras_common_if *ras_block)
+{
+	struct amdgpu_ras_block_object *ras_obj;
+	if (!ras_block)
+		return;
+
+	amdgpu_ras_sysfs_remove(adev, ras_block);
+
+	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
+	if (ras_obj->ras_cb)
+		amdgpu_ras_interrupt_remove_handler(adev, ras_block);
+}
+
+static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev,
+			  struct ras_common_if *ras_block)
+{
+	return amdgpu_ras_block_late_fini(adev, ras_block);
+}
+
+/* do some init work after IP late init as dependence.
+ * and it runs in resume/gpu reset/booting up cases.
+ */
+void amdgpu_ras_resume(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_manager *obj, *tmp;
+
+	if (!adev->ras_enabled || !con) {
+		/* clean ras context for VEGA20 Gaming after send ras disable cmd */
+		amdgpu_release_ras_context(adev);
+
+		return;
+	}
+
+	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
+		/* Set up all other IPs which are not implemented. There is a
+		 * tricky thing that IP's actual ras error type should be
+		 * MULTI_UNCORRECTABLE, but as driver does not handle it, so
+		 * ERROR_NONE make sense anyway.
+		 */
+		amdgpu_ras_enable_all_features(adev, 1);
+
+		/* We enable ras on all hw_supported block, but as boot
+		 * parameter might disable some of them and one or more IP has
+		 * not implemented yet. So we disable them on behalf.
+		 */
+		list_for_each_entry_safe(obj, tmp, &con->head, node) {
+			if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
+				amdgpu_ras_feature_enable(adev, &obj->head, 0);
+				/* there should be no any reference. */
+				WARN_ON(alive_obj(obj));
+			}
+		}
+	}
+}
+
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!adev->ras_enabled || !con)
+		return;
+
+	amdgpu_ras_disable_all_features(adev, 0);
+	/* Make sure all ras objects are disabled. */
+	if (AMDGPU_RAS_GET_FEATURES(con->features))
+		amdgpu_ras_disable_all_features(adev, 1);
+}
+
+int amdgpu_ras_late_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras_block_list *node, *tmp;
+	struct amdgpu_ras_block_object *obj;
+	int r;
+
+	amdgpu_ras_event_mgr_init(adev);
+
+	if (amdgpu_ras_aca_is_supported(adev)) {
+		if (amdgpu_reset_in_recovery(adev)) {
+			if (amdgpu_aca_is_enabled(adev))
+				r = amdgpu_aca_reset(adev);
+			else
+				r = amdgpu_mca_reset(adev);
+			if (r)
+				return r;
+		}
+
+		if (!amdgpu_sriov_vf(adev)) {
+			if (amdgpu_aca_is_enabled(adev))
+				amdgpu_ras_set_aca_debug_mode(adev, false);
+			else
+				amdgpu_ras_set_mca_debug_mode(adev, false);
+		}
+	}
+
+	/* Guest side doesn't need init ras feature */
+	if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev))
+		return 0;
+
+	list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
+		obj = node->ras_obj;
+		if (!obj) {
+			dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
+			continue;
+		}
+
+		if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block))
+			continue;
+
+		if (obj->ras_late_init) {
+			r = obj->ras_late_init(adev, &obj->ras_comm);
+			if (r) {
+				dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n",
+					obj->ras_comm.name, r);
+				return r;
+			}
+		} else
+			amdgpu_ras_block_late_init_default(adev, &obj->ras_comm);
+	}
+
+	return 0;
+}
+
+/* do some fini work before IP fini as dependence */
+int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!adev->ras_enabled || !con)
+		return 0;
+
+
+	/* Need disable ras on all IPs here before ip [hw/sw]fini */
+	if (AMDGPU_RAS_GET_FEATURES(con->features))
+		amdgpu_ras_disable_all_features(adev, 0);
+	amdgpu_ras_recovery_fini(adev);
+	return 0;
+}
+
+int amdgpu_ras_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras_block_list *ras_node, *tmp;
+	struct amdgpu_ras_block_object *obj = NULL;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!adev->ras_enabled || !con)
+		return 0;
+
+	amdgpu_ras_critical_region_fini(adev);
+	mutex_destroy(&con->critical_region_lock);
+
+	list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
+		if (ras_node->ras_obj) {
+			obj = ras_node->ras_obj;
+			if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) &&
+			    obj->ras_fini)
+				obj->ras_fini(adev, &obj->ras_comm);
+			else
+				amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm);
+		}
+
+		/* Clear ras blocks from ras_list and free ras block list node */
+		list_del(&ras_node->node);
+		kfree(ras_node);
+	}
+
+	amdgpu_ras_fs_fini(adev);
+	amdgpu_ras_interrupt_remove_all(adev);
+
+	if (amdgpu_ras_aca_is_supported(adev)) {
+		if (amdgpu_aca_is_enabled(adev))
+			amdgpu_aca_fini(adev);
+		else
+			amdgpu_mca_fini(adev);
+	}
+
+	WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
+
+	if (AMDGPU_RAS_GET_FEATURES(con->features))
+		amdgpu_ras_disable_all_features(adev, 0);
+
+	cancel_delayed_work_sync(&con->ras_counte_delay_work);
+
+	amdgpu_ras_set_context(adev, NULL);
+	kfree(con);
+
+	return 0;
+}
+
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (!ras)
+		return false;
+
+	return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+}
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (ras) {
+		if (status)
+			set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+		else
+			clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+	}
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (ras) {
+		ras->ras_err_state = 0;
+		ras->gpu_reset_flags = 0;
+	}
+}
+
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+			       enum amdgpu_ras_block block)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (ras)
+		set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (ras) {
+		if (block == AMDGPU_RAS_BLOCK__ANY)
+			return (ras->ras_err_state != 0);
+		else
+			return test_bit(block, &ras->ras_err_state) ||
+			       test_bit(AMDGPU_RAS_BLOCK__LAST,
+					&ras->ras_err_state);
+	}
+
+	return false;
+}
+
+static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras;
+
+	ras = amdgpu_ras_get_context(adev);
+	if (!ras)
+		return NULL;
+
+	return ras->event_mgr;
+}
+
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+				     const void *caller)
+{
+	struct ras_event_manager *event_mgr;
+	struct ras_event_state *event_state;
+	int ret = 0;
+
+	if (amdgpu_uniras_enabled(adev))
+		return 0;
+
+	if (type >= RAS_EVENT_TYPE_COUNT) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	event_mgr = __get_ras_event_mgr(adev);
+	if (!event_mgr) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	event_state = &event_mgr->event_state[type];
+	event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
+	atomic64_inc(&event_state->count);
+
+out:
+	if (ret && caller)
+		dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n",
+			 (int)type, caller, ret);
+
+	return ret;
+}
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
+{
+	struct ras_event_manager *event_mgr;
+	u64 id;
+
+	if (type >= RAS_EVENT_TYPE_COUNT)
+		return RAS_EVENT_INVALID_ID;
+
+	switch (type) {
+	case RAS_EVENT_TYPE_FATAL:
+	case RAS_EVENT_TYPE_POISON_CREATION:
+	case RAS_EVENT_TYPE_POISON_CONSUMPTION:
+		event_mgr = __get_ras_event_mgr(adev);
+		if (!event_mgr)
+			return RAS_EVENT_INVALID_ID;
+
+		id = event_mgr->event_state[type].last_seqno;
+		break;
+	case RAS_EVENT_TYPE_INVALID:
+	default:
+		id = RAS_EVENT_INVALID_ID;
+		break;
+	}
+
+	return id;
+}
+
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+{
+	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+		struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+		enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
+		u64 event_id = RAS_EVENT_INVALID_ID;
+
+		if (amdgpu_uniras_enabled(adev))
+			return 0;
+
+		if (!amdgpu_ras_mark_ras_event(adev, type))
+			event_id = amdgpu_ras_acquire_event_id(adev, type);
+
+		RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
+			      "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+
+		amdgpu_ras_set_fed(adev, true);
+		ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+		amdgpu_ras_reset_gpu(adev);
+	}
+
+	return -EBUSY;
+}
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
+{
+	if (adev->asic_type == CHIP_VEGA20 &&
+	    adev->pm.fw_version <= 0x283400) {
+		return !(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) &&
+				amdgpu_ras_intr_triggered();
+	}
+
+	return false;
+}
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!con)
+		return;
+
+	if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+		con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
+		amdgpu_ras_set_context(adev, NULL);
+		kfree(con);
+	}
+}
+
+#ifdef CONFIG_X86_MCE_AMD
+static struct amdgpu_device *find_adev(uint32_t node_id)
+{
+	int i;
+	struct amdgpu_device *adev = NULL;
+
+	for (i = 0; i < mce_adev_list.num_gpu; i++) {
+		adev = mce_adev_list.devs[i];
+
+		if (adev && adev->gmc.xgmi.connected_to_cpu &&
+		    adev->gmc.xgmi.physical_node_id == node_id)
+			break;
+		adev = NULL;
+	}
+
+	return adev;
+}
+
+#define GET_MCA_IPID_GPUID(m)	(((m) >> 44) & 0xF)
+#define GET_UMC_INST(m)		(((m) >> 21) & 0x7)
+#define GET_CHAN_INDEX(m)	((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4))
+#define GPU_ID_OFFSET		8
+
+static int amdgpu_bad_page_notifier(struct notifier_block *nb,
+				    unsigned long val, void *data)
+{
+	struct mce *m = (struct mce *)data;
+	struct amdgpu_device *adev = NULL;
+	uint32_t gpu_id = 0;
+	uint32_t umc_inst = 0, ch_inst = 0;
+
+	/*
+	 * If the error was generated in UMC_V2, which belongs to GPU UMCs,
+	 * and error occurred in DramECC (Extended error code = 0) then only
+	 * process the error, else bail out.
+	 */
+	if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) &&
+		    (XEC(m->status, 0x3f) == 0x0)))
+		return NOTIFY_DONE;
+
+	/*
+	 * If it is correctable error, return.
+	 */
+	if (mce_is_correctable(m))
+		return NOTIFY_OK;
+
+	/*
+	 * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register.
+	 */
+	gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET;
+
+	adev = find_adev(gpu_id);
+	if (!adev) {
+		DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__,
+								gpu_id);
+		return NOTIFY_DONE;
+	}
+
+	/*
+	 * If it is uncorrectable error, then find out UMC instance and
+	 * channel index.
+	 */
+	umc_inst = GET_UMC_INST(m->ipid);
+	ch_inst = GET_CHAN_INDEX(m->ipid);
+
+	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
+			     umc_inst, ch_inst);
+
+	if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+		return NOTIFY_OK;
+	else
+		return NOTIFY_DONE;
+}
+
+static struct notifier_block amdgpu_bad_page_nb = {
+	.notifier_call  = amdgpu_bad_page_notifier,
+	.priority       = MCE_PRIO_UC,
+};
+
+static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+	/*
+	 * Add the adev to the mce_adev_list.
+	 * During mode2 reset, amdgpu device is temporarily
+	 * removed from the mgpu_info list which can cause
+	 * page retirement to fail.
+	 * Use this list instead of mgpu_info to find the amdgpu
+	 * device on which the UMC error was reported.
+	 */
+	mce_adev_list.devs[mce_adev_list.num_gpu++] = adev;
+
+	/*
+	 * Register the x86 notifier only once
+	 * with MCE subsystem.
+	 */
+	if (notifier_registered == false) {
+		mce_register_decode_chain(&amdgpu_bad_page_nb);
+		notifier_registered = true;
+	}
+}
+static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	if (!notifier_registered && !mce_adev_list.num_gpu)
+		return;
+	for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
+		if (mce_adev_list.devs[i] == adev)
+			mce_adev_list.devs[i] = NULL;
+		if (!mce_adev_list.devs[i])
+			++j;
+	}
+
+	if (j == mce_adev_list.num_gpu) {
+		mce_adev_list.num_gpu = 0;
+		/* Unregister x86 notifier with MCE subsystem. */
+		if (notifier_registered) {
+			mce_unregister_decode_chain(&amdgpu_bad_page_nb);
+			notifier_registered = false;
+		}
+	}
+}
+#endif
+
+struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)
+{
+	if (!adev)
+		return NULL;
+
+	return adev->psp.ras_context.ras;
+}
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con)
+{
+	if (!adev)
+		return -EINVAL;
+
+	adev->psp.ras_context.ras = ras_con;
+	return 0;
+}
+
+/* check if ras is supported on block, say, sdma, gfx */
+int amdgpu_ras_is_supported(struct amdgpu_device *adev,
+		unsigned int block)
+{
+	int ret = 0;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (block >= AMDGPU_RAS_BLOCK_COUNT)
+		return 0;
+
+	ret = ras && (adev->ras_enabled & (1 << block));
+
+	/* For the special asic with mem ecc enabled but sram ecc
+	 * not enabled, even if the ras block is not supported on
+	 * .ras_enabled, if the asic supports poison mode and the
+	 * ras block has ras configuration, it can be considered
+	 * that the ras block supports ras function.
+	 */
+	if (!ret &&
+	    (block == AMDGPU_RAS_BLOCK__GFX ||
+	     block == AMDGPU_RAS_BLOCK__SDMA ||
+	     block == AMDGPU_RAS_BLOCK__VCN ||
+	     block == AMDGPU_RAS_BLOCK__JPEG) &&
+		(amdgpu_ras_mask & (1 << block)) &&
+	    amdgpu_ras_is_poison_mode_supported(adev) &&
+	    amdgpu_ras_get_ras_block(adev, block, 0))
+		ret = 1;
+
+	return ret;
+}
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	/* mode1 is the only selection for RMA status */
+	if (amdgpu_ras_is_rma(adev)) {
+		ras->gpu_reset_flags = 0;
+		ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+	}
+
+	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
+		struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+		int hive_ras_recovery = 0;
+
+		if (hive) {
+			hive_ras_recovery = atomic_read(&hive->ras_recovery);
+			amdgpu_put_xgmi_hive(hive);
+		}
+		/* In the case of multiple GPUs, after a GPU has started
+		 * resetting all GPUs on hive, other GPUs do not need to
+		 * trigger GPU reset again.
+		 */
+		if (!hive_ras_recovery)
+			amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+		else
+			atomic_set(&ras->in_recovery, 0);
+	} else {
+		flush_work(&ras->recovery_work);
+		amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+	}
+
+	return 0;
+}
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	if (con) {
+		ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+		if (!ret)
+			con->is_aca_debug_mode = enable;
+	}
+
+	return ret;
+}
+
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int ret = 0;
+
+	if (con) {
+		if (amdgpu_aca_is_enabled(adev))
+			ret = amdgpu_aca_smu_set_debug_mode(adev, enable);
+		else
+			ret = amdgpu_mca_smu_set_debug_mode(adev, enable);
+		if (!ret)
+			con->is_aca_debug_mode = enable;
+	}
+
+	return ret;
+}
+
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+
+	if (!con)
+		return false;
+
+	if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) ||
+	    (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode))
+		return con->is_aca_debug_mode;
+	else
+		return true;
+}
+
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+				     unsigned int *error_query_mode)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
+	const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs;
+
+	if (!con) {
+		*error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY;
+		return false;
+	}
+
+	if (amdgpu_sriov_vf(adev)) {
+		*error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY;
+	} else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) {
+		*error_query_mode =
+			(con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
+	} else {
+		*error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+	}
+
+	return true;
+}
+
+/* Register each ip ras block into amdgpu ras */
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+		struct amdgpu_ras_block_object *ras_block_obj)
+{
+	struct amdgpu_ras_block_list *ras_node;
+	if (!adev || !ras_block_obj)
+		return -EINVAL;
+
+	ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL);
+	if (!ras_node)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ras_node->node);
+	ras_node->ras_obj = ras_block_obj;
+	list_add_tail(&ras_node->node, &adev->ras_list);
+
+	return 0;
+}
+
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name)
+{
+	if (!err_type_name)
+		return;
+
+	switch (err_type) {
+	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+		sprintf(err_type_name, "correctable");
+		break;
+	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+		sprintf(err_type_name, "uncorrectable");
+		break;
+	default:
+		sprintf(err_type_name, "unknown");
+		break;
+	}
+}
+
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+					 const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+					 uint32_t instance,
+					 uint32_t *memory_id)
+{
+	uint32_t err_status_lo_data, err_status_lo_offset;
+
+	if (!reg_entry)
+		return false;
+
+	err_status_lo_offset =
+		AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+					    reg_entry->seg_lo, reg_entry->reg_lo);
+	err_status_lo_data = RREG32(err_status_lo_offset);
+
+	if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) &&
+	    !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG))
+		return false;
+
+	*memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID);
+
+	return true;
+}
+
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+				       const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+				       uint32_t instance,
+				       unsigned long *err_cnt)
+{
+	uint32_t err_status_hi_data, err_status_hi_offset;
+
+	if (!reg_entry)
+		return false;
+
+	err_status_hi_offset =
+		AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance,
+					    reg_entry->seg_hi, reg_entry->reg_hi);
+	err_status_hi_data = RREG32(err_status_hi_offset);
+
+	if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
+	    !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
+		/* keep the check here in case we need to refer to the result later */
+		dev_dbg(adev->dev, "Invalid err_info field\n");
+
+	/* read err count */
+	*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
+
+	return true;
+}
+
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
+					   uint32_t reg_list_size,
+					   const struct amdgpu_ras_memory_id_entry *mem_list,
+					   uint32_t mem_list_size,
+					   uint32_t instance,
+					   uint32_t err_type,
+					   unsigned long *err_count)
+{
+	uint32_t memory_id;
+	unsigned long err_cnt;
+	char err_type_name[16];
+	uint32_t i, j;
+
+	for (i = 0; i < reg_list_size; i++) {
+		/* query memory_id from err_status_lo */
+		if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
+							 instance, &memory_id))
+			continue;
+
+		/* query err_cnt from err_status_hi */
+		if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
+						       instance, &err_cnt) ||
+		    !err_cnt)
+			continue;
+
+		*err_count += err_cnt;
+
+		/* log the errors */
+		amdgpu_ras_get_error_type_name(err_type, err_type_name);
+		if (!mem_list) {
+			/* memory_list is not supported */
+			dev_info(adev->dev,
+				 "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n",
+				 err_cnt, err_type_name,
+				 reg_list[i].block_name,
+				 instance, memory_id);
+		} else {
+			for (j = 0; j < mem_list_size; j++) {
+				if (memory_id == mem_list[j].memory_id) {
+					dev_info(adev->dev,
+						 "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n",
+						 err_cnt, err_type_name,
+						 reg_list[i].block_name,
+						 instance, mem_list[j].name);
+					break;
+				}
+			}
+		}
+	}
+}
+
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
+					   uint32_t reg_list_size,
+					   uint32_t instance)
+{
+	uint32_t err_status_lo_offset, err_status_hi_offset;
+	uint32_t i;
+
+	for (i = 0; i < reg_list_size; i++) {
+		err_status_lo_offset =
+			AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+						    reg_list[i].seg_lo, reg_list[i].reg_lo);
+		err_status_hi_offset =
+			AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance,
+						    reg_list[i].seg_hi, reg_list[i].reg_hi);
+		WREG32(err_status_lo_offset, 0);
+		WREG32(err_status_hi_offset, 0);
+	}
+}
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data)
+{
+	memset(err_data, 0, sizeof(*err_data));
+
+	INIT_LIST_HEAD(&err_data->err_node_list);
+
+	return 0;
+}
+
+static void amdgpu_ras_error_node_release(struct ras_err_node *err_node)
+{
+	if (!err_node)
+		return;
+
+	list_del(&err_node->node);
+	kvfree(err_node);
+}
+
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data)
+{
+	struct ras_err_node *err_node, *tmp;
+
+	list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node)
+		amdgpu_ras_error_node_release(err_node);
+}
+
+static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data,
+							     struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+	struct ras_err_node *err_node;
+	struct amdgpu_smuio_mcm_config_info *ref_id;
+
+	if (!err_data || !mcm_info)
+		return NULL;
+
+	for_each_ras_error(err_node, err_data) {
+		ref_id = &err_node->err_info.mcm_info;
+
+		if (mcm_info->socket_id == ref_id->socket_id &&
+		    mcm_info->die_id == ref_id->die_id)
+			return err_node;
+	}
+
+	return NULL;
+}
+
+static struct ras_err_node *amdgpu_ras_error_node_new(void)
+{
+	struct ras_err_node *err_node;
+
+	err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL);
+	if (!err_node)
+		return NULL;
+
+	INIT_LIST_HEAD(&err_node->node);
+
+	return err_node;
+}
+
+static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b)
+{
+	struct ras_err_node *nodea = container_of(a, struct ras_err_node, node);
+	struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node);
+	struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info;
+	struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info;
+
+	if (unlikely(infoa->socket_id != infob->socket_id))
+		return infoa->socket_id - infob->socket_id;
+	else
+		return infoa->die_id - infob->die_id;
+
+	return 0;
+}
+
+static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
+				struct amdgpu_smuio_mcm_config_info *mcm_info)
+{
+	struct ras_err_node *err_node;
+
+	err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info);
+	if (err_node)
+		return &err_node->err_info;
+
+	err_node = amdgpu_ras_error_node_new();
+	if (!err_node)
+		return NULL;
+
+	memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));
+
+	err_data->err_list_count++;
+	list_add_tail(&err_node->node, &err_data->err_node_list);
+	list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
+
+	return &err_node->err_info;
+}
+
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count)
+{
+	struct ras_err_info *err_info;
+
+	if (!err_data || !mcm_info)
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	if (!err_info)
+		return -EINVAL;
+
+	err_info->ue_count += count;
+	err_data->ue_count += count;
+
+	return 0;
+}
+
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count)
+{
+	struct ras_err_info *err_info;
+
+	if (!err_data || !mcm_info)
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	if (!err_info)
+		return -EINVAL;
+
+	err_info->ce_count += count;
+	err_data->ce_count += count;
+
+	return 0;
+}
+
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count)
+{
+	struct ras_err_info *err_info;
+
+	if (!err_data || !mcm_info)
+		return -EINVAL;
+
+	if (!count)
+		return 0;
+
+	err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
+	if (!err_info)
+		return -EINVAL;
+
+	err_info->de_count += count;
+	err_data->de_count += count;
+
+	return 0;
+}
+
+#define mmMP0_SMN_C2PMSG_92	0x1609C
+#define mmMP0_SMN_C2PMSG_126	0x160BE
+static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
+						 u32 instance)
+{
+	u32 socket_id, aid_id, hbm_id;
+	u32 fw_status;
+	u32 boot_error;
+	u64 reg_addr;
+
+	/* The pattern for smn addressing in other SOC could be different from
+	 * the one for aqua_vanjaram. We should revisit the code if the pattern
+	 * is changed. In such case, replace the aqua_vanjaram implementation
+	 * with more common helper */
+	reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+		   aqua_vanjaram_encode_ext_smn_addressing(instance);
+	fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+	reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
+		   aqua_vanjaram_encode_ext_smn_addressing(instance);
+	boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+
+	socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
+	aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
+	hbm_id = ((1 == AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error)) ? 0 : 1);
+
+	if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
+			 socket_id, aid_id, hbm_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
+			 socket_id, aid_id, hbm_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
+			 socket_id, aid_id, hbm_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
+			 socket_id, aid_id, fw_status);
+
+	if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
+		dev_info(adev->dev,
+			 "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n",
+			 socket_id, aid_id, fw_status);
+}
+
+static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
+					   u32 instance)
+{
+	u64 reg_addr;
+	u32 reg_data;
+	int retry_loop;
+
+	reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
+		   aqua_vanjaram_encode_ext_smn_addressing(instance);
+
+	for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
+		reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
+		if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
+			return false;
+		else
+			msleep(1);
+	}
+
+	return true;
+}
+
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
+{
+	u32 i;
+
+	for (i = 0; i < num_instances; i++) {
+		if (amdgpu_ras_boot_error_detected(adev, i))
+			amdgpu_ras_boot_time_error_reporting(adev, i);
+	}
+}
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+	uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
+	int ret = 0;
+
+	if (amdgpu_ras_check_critical_address(adev, start))
+		return 0;
+
+	mutex_lock(&con->page_rsv_lock);
+	ret = amdgpu_vram_mgr_query_page_status(mgr, start);
+	if (ret == -ENOENT)
+		ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
+	mutex_unlock(&con->page_rsv_lock);
+
+	return ret;
+}
+
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+				const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	if (RAS_EVENT_ID_IS_VALID(event_id))
+		dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
+	else
+		dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
+
+	va_end(args);
+}
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (amdgpu_uniras_enabled(adev))
+		return amdgpu_ras_mgr_is_rma(adev);
+
+	if (!con)
+		return false;
+
+	return con->is_rma;
+}
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev,
+			struct amdgpu_bo *bo)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_vram_mgr_resource *vres;
+	struct ras_critical_region *region;
+	struct drm_buddy_block *block;
+	int ret = 0;
+
+	if (!bo || !bo->tbo.resource)
+		return -EINVAL;
+
+	vres = to_amdgpu_vram_mgr_resource(bo->tbo.resource);
+
+	mutex_lock(&con->critical_region_lock);
+
+	/* Check if the bo had been recorded */
+	list_for_each_entry(region, &con->critical_region_head, node)
+		if (region->bo == bo)
+			goto out;
+
+	/* Record new critical amdgpu bo */
+	list_for_each_entry(block, &vres->blocks, link) {
+		region = kzalloc(sizeof(*region), GFP_KERNEL);
+		if (!region) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		region->bo = bo;
+		region->start = amdgpu_vram_mgr_block_start(block);
+		region->size = amdgpu_vram_mgr_block_size(block);
+		list_add_tail(&region->node, &con->critical_region_head);
+	}
+
+out:
+	mutex_unlock(&con->critical_region_lock);
+
+	return ret;
+}
+
+static void amdgpu_ras_critical_region_init(struct amdgpu_device *adev)
+{
+	amdgpu_ras_add_critical_region(adev, adev->mman.fw_reserved_memory);
+}
+
+static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_critical_region *region, *tmp;
+
+	mutex_lock(&con->critical_region_lock);
+	list_for_each_entry_safe(region, tmp, &con->critical_region_head, node) {
+		list_del(&region->node);
+		kfree(region);
+	}
+	mutex_unlock(&con->critical_region_lock);
+}
+
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_critical_region *region;
+	bool ret = false;
+
+	mutex_lock(&con->critical_region_lock);
+	list_for_each_entry(region, &con->critical_region_head, node) {
+		if ((region->start <= addr) &&
+		    (addr < (region->start + region->size))) {
+			ret = true;
+			break;
+		}
+	}
+	mutex_unlock(&con->critical_region_lock);
+
+	return ret;
+}
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+					  struct list_head *device_list)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		if (amdgpu_uniras_enabled(tmp_adev))
+			amdgpu_ras_mgr_pre_reset(tmp_adev);
+	}
+}
+
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+					  struct list_head *device_list)
+{
+	struct amdgpu_device *tmp_adev = NULL;
+
+	list_for_each_entry(tmp_adev, device_list, reset_list) {
+		if (amdgpu_uniras_enabled(tmp_adev))
+			amdgpu_ras_mgr_post_reset(tmp_adev);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
new file mode 100644
index 000000000000..ff44190d7d98
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -0,0 +1,1047 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_RAS_H
+#define _AMDGPU_RAS_H
+
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/kfifo.h>
+#include <linux/radix-tree.h>
+#include "ta_ras_if.h"
+#include "amdgpu_ras_eeprom.h"
+#include "amdgpu_smuio.h"
+#include "amdgpu_aca.h"
+
+struct amdgpu_iv_entry;
+
+#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x)		AMDGPU_GET_REG_FIELD(x, 0, 0)
+#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x)			AMDGPU_GET_REG_FIELD(x, 1, 1)
+#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x)	AMDGPU_GET_REG_FIELD(x, 2, 2)
+#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x)	AMDGPU_GET_REG_FIELD(x, 3, 3)
+#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x)	AMDGPU_GET_REG_FIELD(x, 4, 4)
+#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x)	AMDGPU_GET_REG_FIELD(x, 5, 5)
+#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x)		AMDGPU_GET_REG_FIELD(x, 6, 6)
+#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x)		AMDGPU_GET_REG_FIELD(x, 7, 7)
+#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x)			AMDGPU_GET_REG_FIELD(x, 10, 8)
+#define AMDGPU_RAS_GPU_ERR_AID_ID(x)			AMDGPU_GET_REG_FIELD(x, 12, 11)
+#define AMDGPU_RAS_GPU_ERR_HBM_ID(x)			AMDGPU_GET_REG_FIELD(x, 14, 13)
+#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x)		AMDGPU_GET_REG_FIELD(x, 29, 29)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x)			AMDGPU_GET_REG_FIELD(x, 30, 30)
+
+#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT	100
+#define AMDGPU_RAS_BOOT_STEADY_STATUS		0xBA
+#define AMDGPU_RAS_BOOT_STATUS_MASK		0xFF
+
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		(0x1 << 0)
+/* position of instance value in sub_block_index of
+ * ta_ras_trigger_error_input, the sub block uses lower 12 bits
+ */
+#define AMDGPU_RAS_INST_MASK 0xfffff000
+#define AMDGPU_RAS_INST_SHIFT 0xc
+
+#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
+#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
+
+/* Reserve 8 physical dram row for possible retirement.
+ * In worst cases, it will lose 8 * 2MB memory in vram domain */
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT	(16ULL << 20)
+/* The high three bits indicates socketid */
+#define AMDGPU_RAS_GET_FEATURES(val)  ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+
+#define RAS_EVENT_INVALID_ID		(BIT_ULL(63))
+#define RAS_EVENT_ID_IS_VALID(x)	(!((x) & BIT_ULL(63)))
+
+#define RAS_EVENT_LOG(adev, id, fmt, ...)	\
+	amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__)
+
+#define amdgpu_ras_mark_ras_event(adev, type)	\
+	(amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0)))
+
+enum amdgpu_ras_block {
+	AMDGPU_RAS_BLOCK__UMC = 0,
+	AMDGPU_RAS_BLOCK__SDMA,
+	AMDGPU_RAS_BLOCK__GFX,
+	AMDGPU_RAS_BLOCK__MMHUB,
+	AMDGPU_RAS_BLOCK__ATHUB,
+	AMDGPU_RAS_BLOCK__PCIE_BIF,
+	AMDGPU_RAS_BLOCK__HDP,
+	AMDGPU_RAS_BLOCK__XGMI_WAFL,
+	AMDGPU_RAS_BLOCK__DF,
+	AMDGPU_RAS_BLOCK__SMN,
+	AMDGPU_RAS_BLOCK__SEM,
+	AMDGPU_RAS_BLOCK__MP0,
+	AMDGPU_RAS_BLOCK__MP1,
+	AMDGPU_RAS_BLOCK__FUSE,
+	AMDGPU_RAS_BLOCK__MCA,
+	AMDGPU_RAS_BLOCK__VCN,
+	AMDGPU_RAS_BLOCK__JPEG,
+	AMDGPU_RAS_BLOCK__IH,
+	AMDGPU_RAS_BLOCK__MPIO,
+	AMDGPU_RAS_BLOCK__MMSCH,
+
+	AMDGPU_RAS_BLOCK__LAST,
+	AMDGPU_RAS_BLOCK__ANY = -1
+};
+
+enum amdgpu_ras_mca_block {
+	AMDGPU_RAS_MCA_BLOCK__MP0 = 0,
+	AMDGPU_RAS_MCA_BLOCK__MP1,
+	AMDGPU_RAS_MCA_BLOCK__MPIO,
+	AMDGPU_RAS_MCA_BLOCK__IOHC,
+
+	AMDGPU_RAS_MCA_BLOCK__LAST
+};
+
+#define AMDGPU_RAS_BLOCK_COUNT	AMDGPU_RAS_BLOCK__LAST
+#define AMDGPU_RAS_MCA_BLOCK_COUNT	AMDGPU_RAS_MCA_BLOCK__LAST
+#define AMDGPU_RAS_BLOCK_MASK	((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
+
+enum amdgpu_ras_gfx_subblock {
+	/* CPC */
+	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
+	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
+		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
+	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
+	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
+	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
+	/* CPF */
+	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
+		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
+	AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+	/* CPG */
+	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
+		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+	/* GDS */
+	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
+	/* SPI */
+	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
+	/* SQ */
+	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
+	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
+	/* SQC (3 ranges) */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+	/* SQC range 0 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
+	/* SQC range 1 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
+	/* SQC range 2 */
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
+	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
+	/* TA */
+	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
+		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
+	/* TCA */
+	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
+		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
+	/* TCC (5 sub-ranges) */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+	/* TCC range 0 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
+	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
+	/* TCC range 1 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
+	/* TCC range 2 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
+	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
+	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
+	/* TCC range 3 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
+	/* TCC range 4 */
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
+	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
+	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
+	/* TCI */
+	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
+	/* TCP */
+	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
+		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
+	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
+	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
+	/* TD */
+	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
+		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
+	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
+	/* EA (3 sub-ranges) */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+	/* EA range 0 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
+	/* EA range 1 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
+	/* EA range 2 */
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
+	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
+		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
+	/* UTC VM L2 bank */
+	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
+	/* UTC VM walker */
+	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
+	/* UTC ATC L2 2MB cache */
+	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
+	/* UTC ATC L2 4KB cache */
+	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
+	AMDGPU_RAS_BLOCK__GFX_MAX
+};
+
+enum amdgpu_ras_error_type {
+	AMDGPU_RAS_ERROR__NONE							= 0,
+	AMDGPU_RAS_ERROR__PARITY						= 1,
+	AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE					= 2,
+	AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE					= 4,
+	AMDGPU_RAS_ERROR__POISON						= 8,
+};
+
+enum amdgpu_ras_ret {
+	AMDGPU_RAS_SUCCESS = 0,
+	AMDGPU_RAS_FAIL,
+	AMDGPU_RAS_UE,
+	AMDGPU_RAS_CE,
+	AMDGPU_RAS_PT,
+};
+
+enum amdgpu_ras_error_query_mode {
+	AMDGPU_RAS_INVALID_ERROR_QUERY		= 0,
+	AMDGPU_RAS_DIRECT_ERROR_QUERY		= 1,
+	AMDGPU_RAS_FIRMWARE_ERROR_QUERY		= 2,
+	AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY	= 3,
+};
+
+/* ras error status reisger fields */
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT	0x0
+#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK	0x00000001L
+#define ERR_STATUS_LO__MEMORY_ID__SHIFT			0x18
+#define ERR_STATUS_LO__MEMORY_ID_MASK			0xFF000000L
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT	0x2
+#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK		0x00000004L
+#define ERR_STATUS__ERR_CNT__SHIFT			0x17
+#define ERR_STATUS__ERR_CNT_MASK			0x03800000L
+
+#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \
+	ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi
+
+#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \
+	(adev->reg_offset[hwip][ip_inst][segment] + (reg))
+
+#define AMDGPU_RAS_ERR_INFO_VALID	(1 << 0)
+#define AMDGPU_RAS_ERR_STATUS_VALID	(1 << 1)
+#define AMDGPU_RAS_ERR_ADDRESS_VALID	(1 << 2)
+
+#define AMDGPU_RAS_GPU_RESET_MODE2_RESET  (0x1 << 0)
+#define AMDGPU_RAS_GPU_RESET_MODE1_RESET  (0x1 << 1)
+
+struct amdgpu_ras_err_status_reg_entry {
+	uint32_t hwip;
+	uint32_t ip_inst;
+	uint32_t seg_lo;
+	uint32_t reg_lo;
+	uint32_t seg_hi;
+	uint32_t reg_hi;
+	uint32_t reg_inst;
+	uint32_t flags;
+	const char *block_name;
+};
+
+struct amdgpu_ras_memory_id_entry {
+	uint32_t memory_id;
+	const char *name;
+};
+
+struct ras_common_if {
+	enum amdgpu_ras_block block;
+	enum amdgpu_ras_error_type type;
+	uint32_t sub_block_index;
+	char name[32];
+};
+
+#define MAX_UMC_CHANNEL_NUM 32
+
+struct ecc_info_per_ch {
+	uint16_t ce_count_lo_chip;
+	uint16_t ce_count_hi_chip;
+	uint64_t mca_umc_status;
+	uint64_t mca_umc_addr;
+	uint64_t mca_ceumc_addr;
+};
+
+struct umc_ecc_info {
+	struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
+
+	/* Determine smu ecctable whether support
+	 * record correctable error address
+	 */
+	int record_ce_addr_supported;
+};
+
+enum ras_event_type {
+	RAS_EVENT_TYPE_INVALID = 0,
+	RAS_EVENT_TYPE_FATAL,
+	RAS_EVENT_TYPE_POISON_CREATION,
+	RAS_EVENT_TYPE_POISON_CONSUMPTION,
+	RAS_EVENT_TYPE_COUNT,
+};
+
+struct ras_event_state {
+	u64 last_seqno;
+	atomic64_t count;
+};
+
+struct ras_event_manager {
+	atomic64_t seqno;
+	struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
+};
+
+struct ras_event_id {
+	enum ras_event_type type;
+	u64 event_id;
+};
+
+struct ras_query_context {
+	struct ras_event_id evid;
+};
+
+typedef int (*pasid_notify)(struct amdgpu_device *adev,
+		uint16_t pasid, void *data);
+
+struct ras_poison_msg {
+	enum amdgpu_ras_block block;
+	uint16_t pasid;
+	uint32_t reset;
+	pasid_notify pasid_fn;
+	void *data;
+};
+
+struct ras_err_pages {
+	uint32_t count;
+	uint64_t *pfn;
+};
+
+struct ras_ecc_err {
+	uint64_t status;
+	uint64_t ipid;
+	uint64_t addr;
+	uint64_t pa_pfn;
+	/* save global channel index across all UMC instances */
+	uint32_t channel_idx;
+	struct ras_err_pages err_pages;
+};
+
+struct ras_ecc_log_info {
+	struct mutex lock;
+	struct radix_tree_root de_page_tree;
+	uint64_t de_queried_count;
+	uint64_t consumption_q_count;
+};
+
+struct ras_critical_region {
+	struct list_head node;
+	struct amdgpu_bo *bo;
+	uint64_t start;
+	uint64_t size;
+};
+
+struct ras_eeprom_table_version {
+	uint32_t minor    : 16;
+	uint32_t major    : 16;
+};
+
+struct ras_eeprom_smu_funcs {
+	int (*get_ras_table_version)(struct amdgpu_device *adev,
+							uint32_t *table_version);
+	int (*get_badpage_count)(struct amdgpu_device *adev, uint32_t *count, uint32_t timeout);
+	int (*get_badpage_mca_addr)(struct amdgpu_device *adev, uint16_t index, uint64_t *mca_addr);
+	int (*set_timestamp)(struct amdgpu_device *adev, uint64_t timestamp);
+	int (*get_timestamp)(struct amdgpu_device *adev,
+							uint16_t index, uint64_t *timestamp);
+	int (*get_badpage_ipid)(struct amdgpu_device *adev, uint16_t index, uint64_t *ipid);
+	int (*erase_ras_table)(struct amdgpu_device *adev, uint32_t *result);
+};
+
+enum ras_smu_feature_flags {
+	RAS_SMU_FEATURE_BIT__RAS_EEPROM = BIT_ULL(0),
+};
+
+struct ras_smu_drv {
+	const struct ras_eeprom_smu_funcs *smu_eeprom_funcs;
+	void (*ras_smu_feature_flags)(struct amdgpu_device *adev, uint64_t *flags);
+};
+
+struct amdgpu_ras {
+	void *ras_mgr;
+	/* ras infrastructure */
+	/* for ras itself. */
+	uint32_t features;
+	uint32_t schema;
+	struct list_head head;
+	/* sysfs */
+	struct device_attribute features_attr;
+	struct device_attribute version_attr;
+	struct device_attribute schema_attr;
+	struct device_attribute event_state_attr;
+	struct bin_attribute badpages_attr;
+	struct dentry *de_ras_eeprom_table;
+	/* block array */
+	struct ras_manager *objs;
+
+	/* gpu recovery */
+	struct work_struct recovery_work;
+	atomic_t in_recovery;
+	atomic_t rma_in_recovery;
+	struct amdgpu_device *adev;
+	/* error handler data */
+	struct ras_err_handler_data *eh_data;
+	struct mutex recovery_lock;
+
+	uint32_t flags;
+	bool reboot;
+	struct amdgpu_ras_eeprom_control eeprom_control;
+
+	bool error_query_ready;
+
+	/* bad page count threshold */
+	uint32_t bad_page_cnt_threshold;
+
+	/* disable ras error count harvest in recovery */
+	bool disable_ras_err_cnt_harvest;
+
+	/* is poison mode supported */
+	bool poison_supported;
+
+	/* RAS count errors delayed work */
+	struct delayed_work ras_counte_delay_work;
+	atomic_t ras_ue_count;
+	atomic_t ras_ce_count;
+
+	/* record umc error info queried from smu */
+	struct umc_ecc_info umc_ecc;
+
+	/* Indicates smu whether need update bad channel info */
+	bool update_channel_flag;
+	/* Record status of smu mca debug mode */
+	bool is_aca_debug_mode;
+	bool is_rma;
+
+	/* Record special requirements of gpu reset caller */
+	uint32_t  gpu_reset_flags;
+
+	struct task_struct *page_retirement_thread;
+	wait_queue_head_t page_retirement_wq;
+	struct mutex page_retirement_lock;
+	atomic_t page_retirement_req_cnt;
+	atomic_t poison_creation_count;
+	atomic_t poison_consumption_count;
+	struct mutex page_rsv_lock;
+	DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+	struct ras_ecc_log_info  umc_ecc_log;
+	struct delayed_work page_retirement_dwork;
+
+	/* ras errors detected */
+	unsigned long ras_err_state;
+
+	/* RAS event manager */
+	struct ras_event_manager __event_mgr;
+	struct ras_event_manager *event_mgr;
+
+	uint64_t reserved_pages_in_bytes;
+
+	pid_t init_task_pid;
+	char init_task_comm[TASK_COMM_LEN];
+
+	int bad_page_num;
+
+	struct list_head critical_region_head;
+	struct mutex critical_region_lock;
+
+	/* Protect poison injection */
+	struct mutex poison_lock;
+
+	/* Disable/Enable uniras switch */
+	bool uniras_enabled;
+	const struct ras_smu_drv *ras_smu_drv;
+};
+
+struct ras_fs_data {
+	char sysfs_name[48];
+	char debugfs_name[32];
+};
+
+struct ras_err_info {
+	struct amdgpu_smuio_mcm_config_info mcm_info;
+	u64 ce_count;
+	u64 ue_count;
+	u64 de_count;
+};
+
+struct ras_err_node {
+	struct list_head node;
+	struct ras_err_info err_info;
+};
+
+struct ras_err_data {
+	unsigned long ue_count;
+	unsigned long ce_count;
+	unsigned long de_count;
+	unsigned long err_addr_cnt;
+	struct eeprom_table_record *err_addr;
+	unsigned long err_addr_len;
+	u32 err_list_count;
+	struct list_head err_node_list;
+};
+
+#define for_each_ras_error(err_node, err_data) \
+	list_for_each_entry(err_node, &(err_data)->err_node_list, node)
+
+struct ras_err_handler_data {
+	/* point to bad page records array */
+	struct eeprom_table_record *bps;
+	/* the count of entries */
+	int count;
+	int count_saved;
+	/* the space can place new entries */
+	int space_left;
+};
+
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+	/* interrupt bottom half */
+	struct work_struct ih_work;
+	int inuse;
+	/* IP callback */
+	ras_ih_cb cb;
+	/* full of entries */
+	unsigned char *ring;
+	unsigned int ring_size;
+	unsigned int element_size;
+	unsigned int aligned_element_size;
+	unsigned int rptr;
+	unsigned int wptr;
+};
+
+struct ras_manager {
+	struct ras_common_if head;
+	/* reference count */
+	int use;
+	/* ras block link */
+	struct list_head node;
+	/* the device */
+	struct amdgpu_device *adev;
+	/* sysfs */
+	struct device_attribute sysfs_attr;
+	int attr_inuse;
+
+	/* fs node name */
+	struct ras_fs_data fs_data;
+
+	/* IH data */
+	struct ras_ih_data ih_data;
+
+	struct ras_err_data err_data;
+
+	struct aca_handle aca_handle;
+};
+
+struct ras_badpage {
+	unsigned int bp;
+	unsigned int size;
+	unsigned int flags;
+};
+
+/* interfaces for IP */
+struct ras_fs_if {
+	struct ras_common_if head;
+	const char* sysfs_name;
+	char debugfs_name[32];
+};
+
+struct ras_query_if {
+	struct ras_common_if head;
+	unsigned long ue_count;
+	unsigned long ce_count;
+	unsigned long de_count;
+};
+
+struct ras_inject_if {
+	struct ras_common_if head;
+	uint64_t address;
+	uint64_t value;
+	uint32_t instance_mask;
+};
+
+struct ras_cure_if {
+	struct ras_common_if head;
+	uint64_t address;
+};
+
+struct ras_ih_if {
+	struct ras_common_if head;
+	ras_ih_cb cb;
+};
+
+struct ras_dispatch_if {
+	struct ras_common_if head;
+	struct amdgpu_iv_entry *entry;
+};
+
+struct ras_debug_if {
+	union {
+		struct ras_common_if head;
+		struct ras_inject_if inject;
+	};
+	int op;
+};
+
+struct amdgpu_ras_block_object {
+	struct ras_common_if  ras_comm;
+
+	int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj,
+				enum amdgpu_ras_block block, uint32_t sub_block_index);
+	int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+	void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+	ras_ih_cb ras_cb;
+	const struct amdgpu_ras_block_hw_ops *hw_ops;
+};
+
+struct amdgpu_ras_block_hw_ops {
+	int  (*ras_error_inject)(struct amdgpu_device *adev,
+			void *inject_if, uint32_t instance_mask);
+	void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status);
+	void (*query_ras_error_status)(struct amdgpu_device *adev);
+	void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
+	void (*reset_ras_error_count)(struct amdgpu_device *adev);
+	void (*reset_ras_error_status)(struct amdgpu_device *adev);
+	bool (*query_poison_status)(struct amdgpu_device *adev);
+	bool (*handle_poison_consumption)(struct amdgpu_device *adev);
+};
+
+/* work flow
+ * vbios
+ * 1: ras feature enable (enabled by default)
+ * psp
+ * 2: ras framework init (in ip_init)
+ * IP
+ * 3: IH add
+ * 4: debugfs/sysfs create
+ * 5: query/inject
+ * 6: debugfs/sysfs remove
+ * 7: IH remove
+ * 8: feature disable
+ */
+
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev);
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info);
+
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
+int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
+				 unsigned long *ce_count,
+				 unsigned long *ue_count,
+				 struct ras_query_if *query_info);
+
+/* error handling functions */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+		struct eeprom_table_record *bps, int pages, bool from_rom);
+
+int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
+		unsigned long *new_cnt);
+
+static inline enum ta_ras_block
+amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
+	switch (block) {
+	case AMDGPU_RAS_BLOCK__UMC:
+		return TA_RAS_BLOCK__UMC;
+	case AMDGPU_RAS_BLOCK__SDMA:
+		return TA_RAS_BLOCK__SDMA;
+	case AMDGPU_RAS_BLOCK__GFX:
+		return TA_RAS_BLOCK__GFX;
+	case AMDGPU_RAS_BLOCK__MMHUB:
+		return TA_RAS_BLOCK__MMHUB;
+	case AMDGPU_RAS_BLOCK__ATHUB:
+		return TA_RAS_BLOCK__ATHUB;
+	case AMDGPU_RAS_BLOCK__PCIE_BIF:
+		return TA_RAS_BLOCK__PCIE_BIF;
+	case AMDGPU_RAS_BLOCK__HDP:
+		return TA_RAS_BLOCK__HDP;
+	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+		return TA_RAS_BLOCK__XGMI_WAFL;
+	case AMDGPU_RAS_BLOCK__DF:
+		return TA_RAS_BLOCK__DF;
+	case AMDGPU_RAS_BLOCK__SMN:
+		return TA_RAS_BLOCK__SMN;
+	case AMDGPU_RAS_BLOCK__SEM:
+		return TA_RAS_BLOCK__SEM;
+	case AMDGPU_RAS_BLOCK__MP0:
+		return TA_RAS_BLOCK__MP0;
+	case AMDGPU_RAS_BLOCK__MP1:
+		return TA_RAS_BLOCK__MP1;
+	case AMDGPU_RAS_BLOCK__FUSE:
+		return TA_RAS_BLOCK__FUSE;
+	case AMDGPU_RAS_BLOCK__MCA:
+		return TA_RAS_BLOCK__MCA;
+	case AMDGPU_RAS_BLOCK__VCN:
+		return TA_RAS_BLOCK__VCN;
+	case AMDGPU_RAS_BLOCK__JPEG:
+		return TA_RAS_BLOCK__JPEG;
+	case AMDGPU_RAS_BLOCK__IH:
+		return TA_RAS_BLOCK__IH;
+	case AMDGPU_RAS_BLOCK__MPIO:
+		return TA_RAS_BLOCK__MPIO;
+	case AMDGPU_RAS_BLOCK__MMSCH:
+		return TA_RAS_BLOCK__MMSCH;
+	default:
+		WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
+		return TA_RAS_BLOCK__UMC;
+	}
+}
+
+static inline enum ta_ras_error_type
+amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
+	switch (error) {
+	case AMDGPU_RAS_ERROR__NONE:
+		return TA_RAS_ERROR__NONE;
+	case AMDGPU_RAS_ERROR__PARITY:
+		return TA_RAS_ERROR__PARITY;
+	case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+		return TA_RAS_ERROR__SINGLE_CORRECTABLE;
+	case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+		return TA_RAS_ERROR__MULTI_UNCORRECTABLE;
+	case AMDGPU_RAS_ERROR__POISON:
+		return TA_RAS_ERROR__POISON;
+	default:
+		WARN_ONCE(1, "RAS ERROR: unexpected error type %d\n", error);
+		return TA_RAS_ERROR__NONE;
+	}
+}
+
+/* called in ip_init and ip_fini */
+int amdgpu_ras_init(struct amdgpu_device *adev);
+int amdgpu_ras_late_init(struct amdgpu_device *adev);
+int amdgpu_ras_fini(struct amdgpu_device *adev);
+int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
+
+int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
+			struct ras_common_if *ras_block);
+
+void amdgpu_ras_block_late_fini(struct amdgpu_device *adev,
+			  struct ras_common_if *ras_block);
+
+int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
+		struct ras_common_if *head, bool enable);
+
+int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
+		struct ras_common_if *head, bool enable);
+
+int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
+
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
+		struct ras_query_if *info);
+
+int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block);
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block);
+
+int amdgpu_ras_error_inject(struct amdgpu_device *adev,
+		struct ras_inject_if *info);
+
+int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
+		struct ras_dispatch_if *info);
+
+struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+		struct ras_common_if *head);
+
+extern atomic_t amdgpu_ras_in_intr;
+
+static inline bool amdgpu_ras_intr_triggered(void)
+{
+	return !!atomic_read(&amdgpu_ras_in_intr);
+}
+
+static inline void amdgpu_ras_intr_cleared(void)
+{
+	atomic_set(&amdgpu_ras_in_intr, 0);
+}
+
+int amdgpu_ras_global_ras_isr(struct amdgpu_device *adev);
+
+void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready);
+
+bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev);
+
+void amdgpu_release_ras_context(struct amdgpu_device *adev);
+
+int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev);
+
+const char *get_ras_block_str(struct ras_common_if *ras_block);
+
+bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block);
+
+int amdgpu_ras_reset_gpu(struct amdgpu_device *adev);
+
+struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev);
+
+int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con);
+
+int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable);
+int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable);
+bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev);
+bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
+				     unsigned int *mode);
+
+int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
+				struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
+void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name);
+bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev,
+					 const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+					 uint32_t instance,
+					 uint32_t *memory_id);
+bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,
+				       const struct amdgpu_ras_err_status_reg_entry *reg_entry,
+				       uint32_t instance,
+				       unsigned long *err_cnt);
+void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
+					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
+					   uint32_t reg_list_size,
+					   const struct amdgpu_ras_memory_id_entry *mem_list,
+					   uint32_t mem_list_size,
+					   uint32_t instance,
+					   uint32_t err_type,
+					   unsigned long *err_count);
+void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
+					   const struct amdgpu_ras_err_status_reg_entry *reg_list,
+					   uint32_t reg_list_size,
+					   uint32_t instance);
+
+int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
+void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
+int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count);
+int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count);
+int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
+					struct amdgpu_smuio_mcm_config_info *mcm_info,
+					u64 count);
+void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances);
+int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
+			       const struct aca_info *aca_info, void *data);
+int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk);
+
+ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
+				  struct aca_handle *handle, char *buf, void *data);
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+			       enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
+int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
+				     const void *caller);
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn);
+
+int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo);
+bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr);
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+		enum amdgpu_ras_block block, uint16_t pasid,
+		pasid_notify pasid_fn, void *data, uint32_t reset);
+
+bool amdgpu_ras_in_recovery(struct amdgpu_device *adev);
+
+__printf(3, 4)
+void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
+				const char *fmt, ...);
+
+bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+void amdgpu_ras_pre_reset(struct amdgpu_device *adev,
+					  struct list_head *device_list);
+void amdgpu_ras_post_reset(struct amdgpu_device *adev,
+					  struct list_head *device_list);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
new file mode 100644
index 000000000000..64dd7a81bff5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -0,0 +1,1930 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_ras_eeprom.h"
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include <linux/bits.h>
+#include "atom.h"
+#include "amdgpu_eeprom.h"
+#include "amdgpu_atomfirmware.h"
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include "amdgpu_reset.h"
+#include "amdgpu_ras_mgr.h"
+
+/* These are memory addresses as would be seen by one or more EEPROM
+ * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
+ * set of EEPROM devices. They form a continuous memory space.
+ *
+ * The I2C device address includes the device type identifier, 1010b,
+ * which is a reserved value and indicates that this is an I2C EEPROM
+ * device. It also includes the top 3 bits of the 19 bit EEPROM memory
+ * address, namely bits 18, 17, and 16. This makes up the 7 bit
+ * address sent on the I2C bus with bit 0 being the direction bit,
+ * which is not represented here, and sent by the hardware directly.
+ *
+ * For instance,
+ *   50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
+ *   54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
+ *   56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
+ * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
+ * address memory in a device or a device on the I2C bus, depending on
+ * the status of pins 1-3. See top of amdgpu_eeprom.c.
+ *
+ * The RAS table lives either at address 0 or address 40000h of EEPROM.
+ */
+#define EEPROM_I2C_MADDR_0      0x0
+#define EEPROM_I2C_MADDR_4      0x40000
+
+/*
+ * The 2 macros below represent the actual size in bytes that
+ * those entities occupy in the EEPROM memory.
+ * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
+ * uses uint64 to store 6b fields such as retired_page.
+ */
+#define RAS_TABLE_HEADER_SIZE   20
+#define RAS_TABLE_RECORD_SIZE   24
+
+/* Table hdr is 'AMDR' */
+#define RAS_TABLE_HDR_VAL       0x414d4452
+
+/* Bad GPU tag ‘BADG’ */
+#define RAS_TABLE_HDR_BAD       0x42414447
+
+/*
+ * EEPROM Table structure v1
+ * ---------------------------------
+ * |                               |
+ * |     EEPROM TABLE HEADER       |
+ * |      ( size 20 Bytes )        |
+ * |                               |
+ * ---------------------------------
+ * |                               |
+ * |    BAD PAGE RECORD AREA       |
+ * |                               |
+ * ---------------------------------
+ */
+
+/* Assume 2-Mbit size EEPROM and take up the whole space. */
+#define RAS_TBL_SIZE_BYTES      (256 * 1024)
+#define RAS_TABLE_START         0
+#define RAS_HDR_START           RAS_TABLE_START
+#define RAS_RECORD_START        (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
+#define RAS_MAX_RECORD_COUNT    ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
+				 / RAS_TABLE_RECORD_SIZE)
+
+/*
+ * EEPROM Table structrue v2.1
+ * ---------------------------------
+ * |                               |
+ * |     EEPROM TABLE HEADER       |
+ * |      ( size 20 Bytes )        |
+ * |                               |
+ * ---------------------------------
+ * |                               |
+ * |     EEPROM TABLE RAS INFO     |
+ * | (available info size 4 Bytes) |
+ * |  ( reserved size 252 Bytes )  |
+ * |                               |
+ * ---------------------------------
+ * |                               |
+ * |     BAD PAGE RECORD AREA      |
+ * |                               |
+ * ---------------------------------
+ */
+
+/* EEPROM Table V2_1 */
+#define RAS_TABLE_V2_1_INFO_SIZE       256
+#define RAS_TABLE_V2_1_INFO_START      RAS_TABLE_HEADER_SIZE
+#define RAS_RECORD_START_V2_1          (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
+					RAS_TABLE_V2_1_INFO_SIZE)
+#define RAS_MAX_RECORD_COUNT_V2_1      ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
+					RAS_TABLE_V2_1_INFO_SIZE) \
+					/ RAS_TABLE_RECORD_SIZE)
+
+#define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */
+
+/* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
+ * offset off of RAS_TABLE_START.  That is, this is something you can
+ * add to control->i2c_address, and then tell I2C layer to read
+ * from/write to there. _N is the so called absolute index,
+ * because it starts right after the table header.
+ */
+#define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \
+				     (_N) * RAS_TABLE_RECORD_SIZE)
+
+#define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \
+				      (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE)
+
+/* Given a 0-based relative record index, 0, 1, 2, ..., etc., off
+ * of "fri", return the absolute record index off of the end of
+ * the table header.
+ */
+#define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \
+			      (_C)->ras_max_record_count)
+
+#define RAS_NUM_RECS(_tbl_hdr)  (((_tbl_hdr)->tbl_size - \
+				  RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_NUM_RECS_V2_1(_tbl_hdr)  (((_tbl_hdr)->tbl_size - \
+				       RAS_TABLE_HEADER_SIZE - \
+				       RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
+
+#define to_amdgpu_device(x) ((container_of(x, struct amdgpu_ras, eeprom_control))->adev)
+
+static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
+	case IP_VERSION(11, 0, 7): /* Sienna cichlid */
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 2): /* Aldebaran */
+	case IP_VERSION(13, 0, 10):
+		return true;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		return (adev->gmc.is_app_apu) ? false : true;
+	default:
+		return false;
+	}
+}
+
+static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
+				  struct amdgpu_ras_eeprom_control *control)
+{
+	struct atom_context *atom_ctx = adev->mode_info.atom_context;
+	u8 i2c_addr;
+
+	if (!control)
+		return false;
+
+	if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+		/* The address given by VBIOS is an 8-bit, wire-format
+		 * address, i.e. the most significant byte.
+		 *
+		 * Normalize it to a 19-bit EEPROM address. Remove the
+		 * device type identifier and make it a 7-bit address;
+		 * then make it a 19-bit EEPROM address. See top of
+		 * amdgpu_eeprom.c.
+		 */
+		i2c_addr = (i2c_addr & 0x0F) >> 1;
+		control->i2c_address = ((u32) i2c_addr) << 16;
+
+		return true;
+	}
+
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(11, 0, 2):
+		/* VEGA20 and ARCTURUS */
+		if (adev->asic_type == CHIP_VEGA20)
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else if (strnstr(atom_ctx->vbios_pn,
+				 "D342",
+				 sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
+	case IP_VERSION(11, 0, 7):
+		control->i2c_address = EEPROM_I2C_MADDR_0;
+		return true;
+	case IP_VERSION(13, 0, 2):
+		if (strnstr(atom_ctx->vbios_pn, "D673",
+			    sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		return true;
+	case IP_VERSION(13, 0, 0):
+		if (strnstr(atom_ctx->vbios_pn, "D707",
+			    sizeof(atom_ctx->vbios_pn)))
+			control->i2c_address = EEPROM_I2C_MADDR_0;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void
+__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+			     unsigned char *buf)
+{
+	u32 *pp = (uint32_t *)buf;
+
+	pp[0] = cpu_to_le32(hdr->header);
+	pp[1] = cpu_to_le32(hdr->version);
+	pp[2] = cpu_to_le32(hdr->first_rec_offset);
+	pp[3] = cpu_to_le32(hdr->tbl_size);
+	pp[4] = cpu_to_le32(hdr->checksum);
+}
+
+static void
+__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr,
+			       unsigned char *buf)
+{
+	u32 *pp = (uint32_t *)buf;
+
+	hdr->header	      = le32_to_cpu(pp[0]);
+	hdr->version	      = le32_to_cpu(pp[1]);
+	hdr->first_rec_offset = le32_to_cpu(pp[2]);
+	hdr->tbl_size	      = le32_to_cpu(pp[3]);
+	hdr->checksum	      = le32_to_cpu(pp[4]);
+}
+
+static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
+{
+	u8 buf[RAS_TABLE_HEADER_SIZE];
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	int res;
+
+	memset(buf, 0, sizeof(buf));
+	__encode_table_header_to_buf(&control->tbl_hdr, buf);
+
+	/* i2c may be unstable in gpu reset */
+	down_read(&adev->reset_domain->sem);
+	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+				  control->i2c_address +
+				  control->ras_header_offset,
+				  buf, RAS_TABLE_HEADER_SIZE);
+	up_read(&adev->reset_domain->sem);
+
+	if (res < 0) {
+		dev_err(adev->dev, "Failed to write EEPROM table header:%d",
+			res);
+	} else if (res < RAS_TABLE_HEADER_SIZE) {
+		dev_err(adev->dev, "Short write:%d out of %d\n", res,
+			RAS_TABLE_HEADER_SIZE);
+		res = -EIO;
+	} else {
+		res = 0;
+	}
+
+	return res;
+}
+
+static void
+__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+			       unsigned char *buf)
+{
+	u32 *pp = (uint32_t *)buf;
+	u32 tmp;
+
+	tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
+	      (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
+	      (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
+	pp[0] = cpu_to_le32(tmp);
+}
+
+static void
+__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
+				 unsigned char *buf)
+{
+	u32 *pp = (uint32_t *)buf;
+	u32 tmp;
+
+	tmp = le32_to_cpu(pp[0]);
+	rai->rma_status = tmp & 0xFF;
+	rai->health_percent = (tmp >> 8) & 0xFF;
+	rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
+}
+
+static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	u8 *buf;
+	int res;
+
+	buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(adev->dev,
+			"Failed to alloc buf to write table ras info\n");
+		return -ENOMEM;
+	}
+
+	__encode_table_ras_info_to_buf(&control->tbl_rai, buf);
+
+	/* i2c may be unstable in gpu reset */
+	down_read(&adev->reset_domain->sem);
+	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+				  control->i2c_address +
+				  control->ras_info_offset,
+				  buf, RAS_TABLE_V2_1_INFO_SIZE);
+	up_read(&adev->reset_domain->sem);
+
+	if (res < 0) {
+		dev_err(adev->dev, "Failed to write EEPROM table ras info:%d",
+			res);
+	} else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+		dev_err(adev->dev, "Short write:%d out of %d\n", res,
+			RAS_TABLE_V2_1_INFO_SIZE);
+		res = -EIO;
+	} else {
+		res = 0;
+	}
+
+	kfree(buf);
+
+	return res;
+}
+
+static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+	int ii;
+	u8  *pp, csum;
+	size_t sz;
+
+	/* Header checksum, skip checksum field in the calculation */
+	sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
+	pp = (u8 *) &control->tbl_hdr;
+	csum = 0;
+	for (ii = 0; ii < sz; ii++, pp++)
+		csum += *pp;
+
+	return csum;
+}
+
+static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
+{
+	int ii;
+	u8  *pp, csum;
+	size_t sz;
+
+	sz = sizeof(control->tbl_rai);
+	pp = (u8 *) &control->tbl_rai;
+	csum = 0;
+	for (ii = 0; ii < sz; ii++, pp++)
+		csum += *pp;
+
+	return csum;
+}
+
+static int amdgpu_ras_eeprom_correct_header_tag(
+	struct amdgpu_ras_eeprom_control *control,
+	uint32_t header)
+{
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	u8 *hh;
+	int res;
+	u8 csum;
+
+	csum = -hdr->checksum;
+
+	hh = (void *) &hdr->header;
+	csum -= (hh[0] + hh[1] + hh[2] + hh[3]);
+	hh = (void *) &header;
+	csum += hh[0] + hh[1] + hh[2] + hh[3];
+	csum = -csum;
+	mutex_lock(&control->ras_tbl_mutex);
+	hdr->header = header;
+	hdr->checksum = csum;
+	res = __write_table_header(control);
+	mutex_unlock(&control->ras_tbl_mutex);
+
+	return res;
+}
+
+static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+
+	switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+	case IP_VERSION(8, 10, 0):
+		hdr->version = RAS_TABLE_VER_V2_1;
+		return;
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 5, 0):
+		hdr->version = RAS_TABLE_VER_V3;
+		return;
+	default:
+		hdr->version = RAS_TABLE_VER_V1;
+		return;
+	}
+}
+
+/**
+ * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
+ * @control: pointer to control structure
+ *
+ * Reset the contents of the header of the RAS EEPROM table.
+ * Return 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	u32 erase_res = 0;
+	u8 csum;
+	int res;
+
+	mutex_lock(&control->ras_tbl_mutex);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+		hdr->header = RAS_TABLE_HDR_VAL;
+		amdgpu_ras_set_eeprom_table_version(control);
+
+		if (hdr->version >= RAS_TABLE_VER_V2_1) {
+			hdr->first_rec_offset = RAS_RECORD_START_V2_1;
+			hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
+					RAS_TABLE_V2_1_INFO_SIZE;
+			rai->rma_status = GPU_HEALTH_USABLE;
+
+			control->ras_record_offset = RAS_RECORD_START_V2_1;
+			control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+			/**
+			 * GPU health represented as a percentage.
+			 * 0 means worst health, 100 means fully health.
+			 */
+			rai->health_percent = 100;
+			/* ecc_page_threshold = 0 means disable bad page retirement */
+			rai->ecc_page_threshold = con->bad_page_cnt_threshold;
+		} else {
+			hdr->first_rec_offset = RAS_RECORD_START;
+			hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
+
+			control->ras_record_offset = RAS_RECORD_START;
+			control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+		}
+
+		csum = __calc_hdr_byte_sum(control);
+		if (hdr->version >= RAS_TABLE_VER_V2_1)
+			csum += __calc_ras_info_byte_sum(control);
+		csum = -csum;
+		hdr->checksum = csum;
+		res = __write_table_header(control);
+		if (!res && hdr->version > RAS_TABLE_VER_V1)
+			res = __write_table_ras_info(control);
+	} else {
+		res = amdgpu_ras_smu_erase_ras_table(adev, &erase_res);
+		if (res || erase_res) {
+			dev_warn(adev->dev, "RAS EEPROM reset failed, res:%d result:%d",
+										res, erase_res);
+			if (!res)
+				res = -EIO;
+		}
+	}
+
+	control->ras_num_recs = 0;
+	control->ras_num_bad_pages = 0;
+	control->ras_num_mca_recs = 0;
+	control->ras_num_pa_recs = 0;
+	control->ras_fri = 0;
+
+	amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
+
+	control->bad_channel_bitmap = 0;
+	amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
+	con->update_channel_flag = false;
+
+	amdgpu_ras_debugfs_set_ret_size(control);
+
+	mutex_unlock(&control->ras_tbl_mutex);
+
+	return res;
+}
+
+static void
+__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control,
+			     struct eeprom_table_record *record,
+			     unsigned char *buf)
+{
+	__le64 tmp = 0;
+	int i = 0;
+
+	/* Next are all record fields according to EEPROM page spec in LE foramt */
+	buf[i++] = record->err_type;
+
+	buf[i++] = record->bank;
+
+	tmp = cpu_to_le64(record->ts);
+	memcpy(buf + i, &tmp, 8);
+	i += 8;
+
+	tmp = cpu_to_le64((record->offset & 0xffffffffffff));
+	memcpy(buf + i, &tmp, 6);
+	i += 6;
+
+	buf[i++] = record->mem_channel;
+	buf[i++] = record->mcumc_id;
+
+	tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
+	memcpy(buf + i, &tmp, 6);
+}
+
+static void
+__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control,
+			       struct eeprom_table_record *record,
+			       unsigned char *buf)
+{
+	__le64 tmp = 0;
+	int i =  0;
+
+	/* Next are all record fields according to EEPROM page spec in LE foramt */
+	record->err_type = buf[i++];
+
+	record->bank = buf[i++];
+
+	memcpy(&tmp, buf + i, 8);
+	record->ts = le64_to_cpu(tmp);
+	i += 8;
+
+	memcpy(&tmp, buf + i, 6);
+	record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
+	i += 6;
+
+	record->mem_channel = buf[i++];
+	record->mcumc_id = buf[i++];
+
+	memcpy(&tmp, buf + i,  6);
+	record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
+}
+
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (amdgpu_uniras_enabled(adev))
+		return amdgpu_ras_mgr_check_eeprom_safety_watermark(adev);
+
+	if (!__is_ras_eeprom_supported(adev) ||
+	    !amdgpu_bad_page_threshold)
+		return false;
+
+	/* skip check eeprom table for VEGA20 Gaming */
+	if (!con)
+		return false;
+	else
+		if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
+			return false;
+
+	if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
+		if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold)
+			dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
+				 con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
+		if ((amdgpu_bad_page_threshold == -1) ||
+		    (amdgpu_bad_page_threshold == -2)) {
+			dev_warn(adev->dev,
+				 "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
+			return false;
+		} else {
+			dev_warn(adev->dev,
+				 "Please consider adjusting the customized threshold.\n");
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM
+ * @control: pointer to control structure
+ * @buf: pointer to buffer containing data to write
+ * @fri: start writing at this index
+ * @num: number of records to write
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
+ */
+static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
+				     u8 *buf, const u32 fri, const u32 num)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	u32 buf_size;
+	int res;
+
+	/* i2c may be unstable in gpu reset */
+	down_read(&adev->reset_domain->sem);
+	buf_size = num * RAS_TABLE_RECORD_SIZE;
+	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
+				  control->i2c_address +
+				  RAS_INDEX_TO_OFFSET(control, fri),
+				  buf, buf_size);
+	up_read(&adev->reset_domain->sem);
+	if (res < 0) {
+		dev_err(adev->dev, "Writing %d EEPROM table records error:%d",
+			num, res);
+	} else if (res < buf_size) {
+		/* Short write, return error.
+		 */
+		dev_err(adev->dev, "Wrote %d records out of %d",
+			res / RAS_TABLE_RECORD_SIZE, num);
+		res = -EIO;
+	} else {
+		res = 0;
+	}
+
+	return res;
+}
+
+static int
+amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
+			       struct eeprom_table_record *record,
+			       const u32 num)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	u32 a, b, i;
+	u8 *buf, *pp;
+	int res;
+
+	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Encode all of them in one go.
+	 */
+	pp = buf;
+	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+		__encode_table_record_to_buf(control, &record[i], pp);
+
+		/* update bad channel bitmap */
+		if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+		    !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+			con->update_channel_flag = true;
+		}
+	}
+
+	/* a, first record index to write into.
+	 * b, last record index to write into.
+	 * a = first index to read (fri) + number of records in the table,
+	 * b = a + @num - 1.
+	 * Let N = control->ras_max_num_record_count, then we have,
+	 * case 0: 0 <= a <= b < N,
+	 *   just append @num records starting at a;
+	 * case 1: 0 <= a < N <= b,
+	 *   append (N - a) records starting at a, and
+	 *   append the remainder,  b % N + 1, starting at 0.
+	 * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases,
+	 * case 2a: 0 <= a <= b < N
+	 *   append num records starting at a; and fix fri if b overwrote it,
+	 *   and since a <= b, if b overwrote it then a must've also,
+	 *   and if b didn't overwrite it, then a didn't also.
+	 * case 2b: 0 <= b < a < N
+	 *   write num records starting at a, which wraps around 0=N
+	 *   and overwrite fri unconditionally. Now from case 2a,
+	 *   this means that b eclipsed fri to overwrite it and wrap
+	 *   around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally
+	 *   set fri = b + 1 (mod N).
+	 * Now, since fri is updated in every case, except the trivial case 0,
+	 * the number of records present in the table after writing, is,
+	 * num_recs - 1 = b - fri (mod N), and we take the positive value,
+	 * by adding an arbitrary multiple of N before taking the modulo N
+	 * as shown below.
+	 */
+	a = control->ras_fri + control->ras_num_recs;
+	b = a + num  - 1;
+	if (b < control->ras_max_record_count) {
+		res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+	} else if (a < control->ras_max_record_count) {
+		u32 g0, g1;
+
+		g0 = control->ras_max_record_count - a;
+		g1 = b % control->ras_max_record_count + 1;
+		res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+		if (res)
+			goto Out;
+		res = __amdgpu_ras_eeprom_write(control,
+						buf + g0 * RAS_TABLE_RECORD_SIZE,
+						0, g1);
+		if (res)
+			goto Out;
+		if (g1 > control->ras_fri)
+			control->ras_fri = g1 % control->ras_max_record_count;
+	} else {
+		a %= control->ras_max_record_count;
+		b %= control->ras_max_record_count;
+
+		if (a <= b) {
+			/* Note that, b - a + 1 = num. */
+			res = __amdgpu_ras_eeprom_write(control, buf, a, num);
+			if (res)
+				goto Out;
+			if (b >= control->ras_fri)
+				control->ras_fri = (b + 1) % control->ras_max_record_count;
+		} else {
+			u32 g0, g1;
+
+			/* b < a, which means, we write from
+			 * a to the end of the table, and from
+			 * the start of the table to b.
+			 */
+			g0 = control->ras_max_record_count - a;
+			g1 = b + 1;
+			res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
+			if (res)
+				goto Out;
+			res = __amdgpu_ras_eeprom_write(control,
+							buf + g0 * RAS_TABLE_RECORD_SIZE,
+							0, g1);
+			if (res)
+				goto Out;
+			control->ras_fri = g1 % control->ras_max_record_count;
+		}
+	}
+	control->ras_num_recs = 1 + (control->ras_max_record_count + b
+				     - control->ras_fri)
+		% control->ras_max_record_count;
+
+	/*old asics only save pa to eeprom like before*/
+	if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
+		control->ras_num_pa_recs += num;
+	else
+		control->ras_num_mca_recs += num;
+
+	control->ras_num_bad_pages = con->bad_page_num;
+Out:
+	kfree(buf);
+	return res;
+}
+
+static int
+amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	u8 *buf, *pp, csum;
+	u32 buf_size;
+	int res;
+
+	/* Modify the header if it exceeds.
+	 */
+	if (amdgpu_bad_page_threshold != 0 &&
+	    control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
+		dev_warn(adev->dev,
+			"Saved bad pages %d reaches threshold value %d\n",
+			control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+
+		if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
+		    amdgpu_cper_generate_bp_threshold_record(adev))
+			dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+		if ((amdgpu_bad_page_threshold != -1) &&
+		    (amdgpu_bad_page_threshold != -2)) {
+			control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
+			if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
+				control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
+				control->tbl_rai.health_percent = 0;
+			}
+			ras->is_rma = true;
+		}
+
+		/* ignore the -ENOTSUPP return value */
+		amdgpu_dpm_send_rma_reason(adev);
+	}
+
+	if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+		control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+					    RAS_TABLE_V2_1_INFO_SIZE +
+					    control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+	else
+		control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
+					    control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+	control->tbl_hdr.checksum = 0;
+
+	buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+	buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(adev->dev,
+			"allocating memory for table of size %d bytes failed\n",
+			control->tbl_hdr.tbl_size);
+		res = -ENOMEM;
+		goto Out;
+	}
+
+	down_read(&adev->reset_domain->sem);
+	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+				 control->i2c_address +
+				 control->ras_record_offset,
+				 buf, buf_size);
+	up_read(&adev->reset_domain->sem);
+	if (res < 0) {
+		dev_err(adev->dev, "EEPROM failed reading records:%d\n", res);
+		goto Out;
+	} else if (res < buf_size) {
+		dev_err(adev->dev, "EEPROM read %d out of %d bytes\n", res,
+			buf_size);
+		res = -EIO;
+		goto Out;
+	}
+
+	/**
+	 * bad page records have been stored in eeprom,
+	 * now calculate gpu health percent
+	 */
+	if (amdgpu_bad_page_threshold != 0 &&
+	    control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
+	    control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
+		control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
+						   control->ras_num_bad_pages) * 100) /
+						   ras->bad_page_cnt_threshold;
+
+	/* Recalc the checksum.
+	 */
+	csum = 0;
+	for (pp = buf; pp < buf + buf_size; pp++)
+		csum += *pp;
+
+	csum += __calc_hdr_byte_sum(control);
+	if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+		csum += __calc_ras_info_byte_sum(control);
+	/* avoid sign extension when assigning to "checksum" */
+	csum = -csum;
+	control->tbl_hdr.checksum = csum;
+	res = __write_table_header(control);
+	if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
+		res = __write_table_ras_info(control);
+Out:
+	kfree(buf);
+	return res;
+}
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	int ret, retry = 20;
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return 0;
+
+	control->ras_num_recs_old = control->ras_num_recs;
+
+	do {
+		/* 1000ms timeout is long enough, smu_get_badpage_count won't
+		 * return -EBUSY before timeout.
+		 */
+		ret = amdgpu_ras_smu_get_badpage_count(adev,
+			&(control->ras_num_recs), RAS_SMU_MESSAGE_TIMEOUT_MS);
+		if (!ret &&
+		    (control->ras_num_recs_old == control->ras_num_recs)) {
+			/* record number update in PMFW needs some time,
+			 * smu_get_badpage_count may return immediately without
+			 * count update, sleep for a while and retry again.
+			 */
+			msleep(50);
+			retry--;
+		} else {
+			break;
+		}
+	} while (retry);
+
+	/* no update of record number is not a real failure,
+	 * don't print warning here
+	 */
+	if (!ret && (control->ras_num_recs_old == control->ras_num_recs))
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
+		return 0;
+
+	control->ras_num_bad_pages = con->bad_page_num;
+
+	if (amdgpu_bad_page_threshold != 0 &&
+	    control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
+		dev_warn(adev->dev,
+			"Saved bad pages %d reaches threshold value %d\n",
+			control->ras_num_bad_pages, con->bad_page_cnt_threshold);
+
+		if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
+			dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
+
+		if ((amdgpu_bad_page_threshold != -1) &&
+		    (amdgpu_bad_page_threshold != -2))
+			con->is_rma = true;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
+ * @control: pointer to control structure
+ * @record: array of records to append
+ * @num: number of records in @record array
+ *
+ * Append @num records to the table, calculate the checksum and write
+ * the table back to EEPROM. The maximum number of records that
+ * can be appended is between 1 and control->ras_max_record_count,
+ * regardless of how many records are already stored in the table.
+ *
+ * Return 0 on success or if EEPROM is not supported, -errno on error.
+ */
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+			     struct eeprom_table_record *record,
+			     const u32 num)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	int res, i;
+	uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		return amdgpu_ras_smu_eeprom_append(control);
+
+	if (num == 0) {
+		dev_err(adev->dev, "will not append 0 records\n");
+		return -EINVAL;
+	} else if (num > control->ras_max_record_count) {
+		dev_err(adev->dev,
+			"cannot append %d records than the size of table %d\n",
+			num, control->ras_max_record_count);
+		return -EINVAL;
+	}
+
+	if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+		nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+	/* set the new channel index flag */
+	for (i = 0; i < num; i++)
+		record[i].retired_page |= (nps << UMC_NPS_SHIFT);
+
+	mutex_lock(&control->ras_tbl_mutex);
+
+	res = amdgpu_ras_eeprom_append_table(control, record, num);
+	if (!res)
+		res = amdgpu_ras_eeprom_update_header(control);
+	if (!res)
+		amdgpu_ras_debugfs_set_ret_size(control);
+
+	mutex_unlock(&control->ras_tbl_mutex);
+
+	/* clear channel index flag, the flag is only saved on eeprom */
+	for (i = 0; i < num; i++)
+		record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
+
+	return res;
+}
+
+/**
+ * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer
+ * @control: pointer to control structure
+ * @buf: pointer to buffer to read into
+ * @fri: first record index, start reading at this index, absolute index
+ * @num: number of records to read
+ *
+ * The caller must hold the table mutex in @control.
+ * Return 0 on success, -errno otherwise.
+ */
+static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+				    u8 *buf, const u32 fri, const u32 num)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	u32 buf_size;
+	int res;
+
+	/* i2c may be unstable in gpu reset */
+	down_read(&adev->reset_domain->sem);
+	buf_size = num * RAS_TABLE_RECORD_SIZE;
+	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+				 control->i2c_address +
+				 RAS_INDEX_TO_OFFSET(control, fri),
+				 buf, buf_size);
+	up_read(&adev->reset_domain->sem);
+	if (res < 0) {
+		dev_err(adev->dev, "Reading %d EEPROM table records error:%d",
+			num, res);
+	} else if (res < buf_size) {
+		/* Short read, return error.
+		 */
+		dev_err(adev->dev, "Read %d records out of %d",
+			res / RAS_TABLE_RECORD_SIZE, num);
+		res = -EIO;
+	} else {
+		res = 0;
+	}
+
+	return res;
+}
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+			struct eeprom_table_record *record, u32 rec_idx,
+			const u32 num)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	uint64_t ts, end_idx;
+	int i, ret;
+	u64 mca, ipid;
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return 0;
+
+	if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
+		return -EOPNOTSUPP;
+
+	end_idx = rec_idx + num;
+	for (i = rec_idx; i < end_idx; i++) {
+		ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
+		if (ret)
+			return ret;
+
+		ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
+		if (ret)
+			return ret;
+
+		ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
+		if (ret)
+			return ret;
+
+		record[i - rec_idx].address = mca;
+		/* retired_page (pa) is unused now */
+		record[i - rec_idx].retired_page = 0x1ULL;
+		record[i - rec_idx].ts = ts;
+		record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+		adev->umc.ras->mca_ipid_parse(adev, ipid,
+			(uint32_t *)&(record[i - rec_idx].cu),
+			(uint32_t *)&(record[i - rec_idx].mem_channel),
+			(uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ras_eeprom_read -- read EEPROM
+ * @control: pointer to control structure
+ * @record: array of records to read into
+ * @num: number of records in @record
+ *
+ * Reads num records from the RAS table in EEPROM and
+ * writes the data into @record array.
+ *
+ * Returns 0 on success, -errno on error.
+ */
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+			   struct eeprom_table_record *record,
+			   const u32 num)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	int i, res;
+	u8 *buf, *pp;
+	u32 g0, g1;
+
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+
+	if (num == 0) {
+		dev_err(adev->dev, "will not read 0 records\n");
+		return -EINVAL;
+	} else if (num > control->ras_num_recs) {
+		dev_err(adev->dev, "too many records to read:%d available:%d\n",
+			num, control->ras_num_recs);
+		return -EINVAL;
+	}
+
+	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Determine how many records to read, from the first record
+	 * index, fri, to the end of the table, and from the beginning
+	 * of the table, such that the total number of records is
+	 * @num, and we handle wrap around when fri > 0 and
+	 * fri + num > RAS_MAX_RECORD_COUNT.
+	 *
+	 * First we compute the index of the last element
+	 * which would be fetched from each region,
+	 * g0 is in [fri, fri + num - 1], and
+	 * g1 is in [0, RAS_MAX_RECORD_COUNT - 1].
+	 * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of
+	 * the last element to fetch, we set g0 to _the number_
+	 * of elements to fetch, @num, since we know that the last
+	 * indexed to be fetched does not exceed the table.
+	 *
+	 * If, however, g0 >= RAS_MAX_RECORD_COUNT, then
+	 * we set g0 to the number of elements to read
+	 * until the end of the table, and g1 to the number of
+	 * elements to read from the beginning of the table.
+	 */
+	g0 = control->ras_fri + num - 1;
+	g1 = g0 % control->ras_max_record_count;
+	if (g0 < control->ras_max_record_count) {
+		g0 = num;
+		g1 = 0;
+	} else {
+		g0 = control->ras_max_record_count - control->ras_fri;
+		g1 += 1;
+	}
+
+	mutex_lock(&control->ras_tbl_mutex);
+	res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0);
+	if (res)
+		goto Out;
+	if (g1) {
+		res = __amdgpu_ras_eeprom_read(control,
+					       buf + g0 * RAS_TABLE_RECORD_SIZE,
+					       0, g1);
+		if (res)
+			goto Out;
+	}
+
+	res = 0;
+
+	/* Read up everything? Then transform.
+	 */
+	pp = buf;
+	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
+		__decode_table_record_from_buf(control, &record[i], pp);
+
+		/* update bad channel bitmap */
+		if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+		    !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
+			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
+			con->update_channel_flag = true;
+		}
+	}
+Out:
+	kfree(buf);
+	mutex_unlock(&control->ras_tbl_mutex);
+
+	return res;
+}
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
+{
+	/* get available eeprom table version first before eeprom table init */
+	amdgpu_ras_set_eeprom_table_version(control);
+
+	if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+		return RAS_MAX_RECORD_COUNT_V2_1;
+	else
+		return RAS_MAX_RECORD_COUNT;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf,
+				    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+	u8 data[50];
+	int res;
+
+	if (!size)
+		return size;
+
+	if (!ras || !control) {
+		res = snprintf(data, sizeof(data), "Not supported\n");
+	} else {
+		res = snprintf(data, sizeof(data), "%d bytes or %d records\n",
+			       RAS_TBL_SIZE_BYTES, control->ras_max_record_count);
+	}
+
+	if (*pos >= res)
+		return 0;
+
+	res -= *pos;
+	res = min_t(size_t, res, size);
+
+	if (copy_to_user(buf, &data[*pos], res))
+		return -EFAULT;
+
+	*pos += res;
+
+	return res;
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_ras_debugfs_eeprom_size_read,
+	.write = NULL,
+	.llseek = default_llseek,
+};
+
+static const char *tbl_hdr_str = " Signature    Version  FirstOffs       Size   Checksum\n";
+static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n";
+#define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1)
+static const char *rec_hdr_str = "Index  Offset ErrType Bank/CU          TimeStamp      Offs/Addr MemChl MCUMCID    RetiredPage\n";
+static const char *rec_hdr_fmt = "%5d 0x%05X %7s    0x%02X 0x%016llX 0x%012llX   0x%02X    0x%02X 0x%012llX\n";
+#define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1)
+
+static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = {
+	"ignore",
+	"re",
+	"ue",
+};
+
+static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control)
+{
+	return strlen(tbl_hdr_str) + tbl_hdr_fmt_size +
+		strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs;
+}
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras,
+					      eeprom_control);
+	struct dentry *de = ras->de_ras_eeprom_table;
+
+	if (de)
+		d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control);
+}
+
+static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
+					     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control;
+	const size_t orig_size = size;
+	int res = -EFAULT;
+	size_t data_len;
+
+	/* pmfw manages eeprom data by itself */
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		return 0;
+
+	mutex_lock(&control->ras_tbl_mutex);
+
+	/* We want *pos - data_len > 0, which means there's
+	 * bytes to be printed from data.
+	 */
+	data_len = strlen(tbl_hdr_str);
+	if (*pos < data_len) {
+		data_len -= *pos;
+		data_len = min_t(size_t, data_len, size);
+		if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len))
+			goto Out;
+		buf += data_len;
+		size -= data_len;
+		*pos += data_len;
+	}
+
+	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size;
+	if (*pos < data_len && size > 0) {
+		u8 data[tbl_hdr_fmt_size + 1];
+		loff_t lpos;
+
+		snprintf(data, sizeof(data), tbl_hdr_fmt,
+			 control->tbl_hdr.header,
+			 control->tbl_hdr.version,
+			 control->tbl_hdr.first_rec_offset,
+			 control->tbl_hdr.tbl_size,
+			 control->tbl_hdr.checksum);
+
+		data_len -= *pos;
+		data_len = min_t(size_t, data_len, size);
+		lpos = *pos - strlen(tbl_hdr_str);
+		if (copy_to_user(buf, &data[lpos], data_len))
+			goto Out;
+		buf += data_len;
+		size -= data_len;
+		*pos += data_len;
+	}
+
+	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str);
+	if (*pos < data_len && size > 0) {
+		loff_t lpos;
+
+		data_len -= *pos;
+		data_len = min_t(size_t, data_len, size);
+		lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size;
+		if (copy_to_user(buf, &rec_hdr_str[lpos], data_len))
+			goto Out;
+		buf += data_len;
+		size -= data_len;
+		*pos += data_len;
+	}
+
+	data_len = amdgpu_ras_debugfs_table_size(control);
+	if (*pos < data_len && size > 0) {
+		u8 dare[RAS_TABLE_RECORD_SIZE];
+		u8 data[rec_hdr_fmt_size + 1];
+		struct eeprom_table_record record;
+		int s, r;
+
+		/* Find the starting record index
+		 */
+		s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+			strlen(rec_hdr_str);
+		s = s / rec_hdr_fmt_size;
+		r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
+			strlen(rec_hdr_str);
+		r = r % rec_hdr_fmt_size;
+
+		for ( ; size > 0 && s < control->ras_num_recs; s++) {
+			u32 ai = RAS_RI_TO_AI(control, s);
+			/* Read a single record
+			 */
+			res = __amdgpu_ras_eeprom_read(control, dare, ai, 1);
+			if (res)
+				goto Out;
+			__decode_table_record_from_buf(control, &record, dare);
+			snprintf(data, sizeof(data), rec_hdr_fmt,
+				 s,
+				 RAS_INDEX_TO_OFFSET(control, ai),
+				 record_err_type_str[record.err_type],
+				 record.bank,
+				 record.ts,
+				 record.offset,
+				 record.mem_channel,
+				 record.mcumc_id,
+				 record.retired_page);
+
+			data_len = min_t(size_t, rec_hdr_fmt_size - r, size);
+			if (copy_to_user(buf, &data[r], data_len)) {
+				res = -EFAULT;
+				goto Out;
+			}
+			buf += data_len;
+			size -= data_len;
+			*pos += data_len;
+			r = 0;
+		}
+	}
+	res = 0;
+Out:
+	mutex_unlock(&control->ras_tbl_mutex);
+	return res < 0 ? res : orig_size - size;
+}
+
+static ssize_t
+amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf,
+				     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
+	u8 data[81];
+	int res;
+
+	if (!size)
+		return size;
+
+	if (!ras || !control) {
+		res = snprintf(data, sizeof(data), "Not supported\n");
+		if (*pos >= res)
+			return 0;
+
+		res -= *pos;
+		res = min_t(size_t, res, size);
+
+		if (copy_to_user(buf, &data[*pos], res))
+			return -EFAULT;
+
+		*pos += res;
+
+		return res;
+	} else {
+		return amdgpu_ras_debugfs_table_read(f, buf, size, pos);
+	}
+}
+
+const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_ras_debugfs_eeprom_table_read,
+	.write = NULL,
+	.llseek = default_llseek,
+};
+
+/**
+ * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum
+ * @control: pointer to control structure
+ *
+ * Check the checksum of the stored in EEPROM RAS table.
+ *
+ * Return 0 if the checksum is correct,
+ * positive if it is not correct, and
+ * -errno on I/O error.
+ */
+static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	int buf_size, res;
+	u8  csum, *buf, *pp;
+
+	if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
+		buf_size = RAS_TABLE_HEADER_SIZE +
+			   RAS_TABLE_V2_1_INFO_SIZE +
+			   control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+	else
+		buf_size = RAS_TABLE_HEADER_SIZE +
+			   control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
+
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		dev_err(adev->dev,
+			"Out of memory checking RAS table checksum.\n");
+		return -ENOMEM;
+	}
+
+	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+				 control->i2c_address +
+				 control->ras_header_offset,
+				 buf, buf_size);
+	if (res < buf_size) {
+		dev_err(adev->dev, "Partial read for checksum, res:%d\n", res);
+		/* On partial reads, return -EIO.
+		 */
+		if (res >= 0)
+			res = -EIO;
+		goto Out;
+	}
+
+	csum = 0;
+	for (pp = buf; pp < buf + buf_size; pp++)
+		csum += *pp;
+Out:
+	kfree(buf);
+	return res < 0 ? res : csum;
+}
+
+static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	unsigned char *buf;
+	int res;
+
+	buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
+	if (!buf) {
+		dev_err(adev->dev,
+			"Failed to alloc buf to read EEPROM table ras info\n");
+		return -ENOMEM;
+	}
+
+	/**
+	 * EEPROM table V2_1 supports ras info,
+	 * read EEPROM table ras info
+	 */
+	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+				 control->i2c_address + control->ras_info_offset,
+				 buf, RAS_TABLE_V2_1_INFO_SIZE);
+	if (res < RAS_TABLE_V2_1_INFO_SIZE) {
+		dev_err(adev->dev,
+			"Failed to read EEPROM table ras info, res:%d", res);
+		res = res >= 0 ? -EIO : res;
+		goto Out;
+	}
+
+	__decode_table_ras_info_from_buf(rai, buf);
+
+Out:
+	kfree(buf);
+	return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
+}
+
+static int amdgpu_ras_smu_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	uint64_t local_time;
+	int res;
+
+	ras->is_rma = false;
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+	mutex_init(&control->ras_tbl_mutex);
+
+	res = amdgpu_ras_smu_get_table_version(adev, &(hdr->version));
+	if (res)
+		return res;
+
+	res = amdgpu_ras_smu_get_badpage_count(adev,
+								&(control->ras_num_recs), 100);
+	if (res)
+		return res;
+
+	local_time = (uint64_t)ktime_get_real_seconds();
+	res = amdgpu_ras_smu_set_timestamp(adev, local_time);
+	if (res)
+		return res;
+
+	control->ras_max_record_count = 4000;
+
+	control->ras_num_mca_recs = 0;
+	control->ras_num_pa_recs = 0;
+
+	return 0;
+}
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int res;
+
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		return amdgpu_ras_smu_eeprom_init(control);
+
+	ras->is_rma = false;
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+
+	/* Verify i2c adapter is initialized */
+	if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+		return -ENOENT;
+
+	if (!__get_eeprom_i2c_addr(adev, control))
+		return -EINVAL;
+
+	control->ras_header_offset = RAS_HDR_START;
+	control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
+	mutex_init(&control->ras_tbl_mutex);
+
+	/* Read the table header from EEPROM address */
+	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
+				 control->i2c_address + control->ras_header_offset,
+				 buf, RAS_TABLE_HEADER_SIZE);
+	if (res < RAS_TABLE_HEADER_SIZE) {
+		dev_err(adev->dev, "Failed to read EEPROM table header, res:%d",
+			res);
+		return res >= 0 ? -EIO : res;
+	}
+
+	__decode_table_header_from_buf(hdr, buf);
+
+	if (hdr->header != RAS_TABLE_HDR_VAL &&
+	    hdr->header != RAS_TABLE_HDR_BAD) {
+		dev_info(adev->dev, "Creating a new EEPROM table");
+		return amdgpu_ras_eeprom_reset_table(control);
+	}
+
+	switch (hdr->version) {
+	case RAS_TABLE_VER_V2_1:
+	case RAS_TABLE_VER_V3:
+		control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
+		control->ras_record_offset = RAS_RECORD_START_V2_1;
+		control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
+		break;
+	case RAS_TABLE_VER_V1:
+		control->ras_num_recs = RAS_NUM_RECS(hdr);
+		control->ras_record_offset = RAS_RECORD_START;
+		control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+		break;
+	default:
+		dev_err(adev->dev,
+			"RAS header invalid, unsupported version: %u",
+			hdr->version);
+		return -EINVAL;
+	}
+
+	if (control->ras_num_recs > control->ras_max_record_count) {
+		dev_err(adev->dev,
+			"RAS header invalid, records in header: %u max allowed :%u",
+			control->ras_num_recs, control->ras_max_record_count);
+		return -EINVAL;
+	}
+
+	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+	control->ras_num_mca_recs = 0;
+	control->ras_num_pa_recs = 0;
+	return 0;
+}
+
+static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+
+	control->ras_num_bad_pages = ras->bad_page_num;
+
+	if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
+	    amdgpu_bad_page_threshold != 0) {
+		dev_warn(adev->dev,
+			"RAS records:%d exceed threshold:%d\n",
+			control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+		if ((amdgpu_bad_page_threshold == -1) ||
+			(amdgpu_bad_page_threshold == -2)) {
+			dev_warn(adev->dev,
+				 "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+		} else {
+			ras->is_rma = true;
+			dev_warn(adev->dev,
+				 "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+		}
+
+		return 0;
+	}
+
+	dev_dbg(adev->dev,
+		"Found existing EEPROM table with %d records",
+		control->ras_num_bad_pages);
+
+	/* Warn if we are at 90% of the threshold or above
+	 */
+	if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+		dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+				control->ras_num_bad_pages,
+				ras->bad_page_cnt_threshold);
+	return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+	struct amdgpu_device *adev = to_amdgpu_device(control);
+	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	int res = 0;
+
+	if (amdgpu_ras_smu_eeprom_supported(adev))
+		return amdgpu_ras_smu_eeprom_check(control);
+
+	if (!__is_ras_eeprom_supported(adev))
+		return 0;
+
+	/* Verify i2c adapter is initialized */
+	if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+		return -ENOENT;
+
+	if (!__get_eeprom_i2c_addr(adev, control))
+		return -EINVAL;
+
+	control->ras_num_bad_pages = ras->bad_page_num;
+
+	if (hdr->header == RAS_TABLE_HDR_VAL) {
+		dev_dbg(adev->dev,
+			"Found existing EEPROM table with %d records",
+			control->ras_num_bad_pages);
+
+		if (hdr->version >= RAS_TABLE_VER_V2_1) {
+			res = __read_table_ras_info(control);
+			if (res)
+				return res;
+		}
+
+		res = __verify_ras_table_checksum(control);
+		if (res)
+			dev_err(adev->dev,
+				"RAS table incorrect checksum or error:%d\n",
+				res);
+
+		/* Warn if we are at 90% of the threshold or above
+		 */
+		if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
+			dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
+					control->ras_num_bad_pages,
+					ras->bad_page_cnt_threshold);
+	} else if (hdr->header == RAS_TABLE_HDR_BAD &&
+		   amdgpu_bad_page_threshold != 0) {
+		if (hdr->version >= RAS_TABLE_VER_V2_1) {
+			res = __read_table_ras_info(control);
+			if (res)
+				return res;
+		}
+
+		res = __verify_ras_table_checksum(control);
+		if (res) {
+			dev_err(adev->dev,
+				"RAS Table incorrect checksum or error:%d\n",
+				res);
+			return -EINVAL;
+		}
+		if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
+			/* This means that, the threshold was increased since
+			 * the last time the system was booted, and now,
+			 * ras->bad_page_cnt_threshold - control->num_recs > 0,
+			 * so that at least one more record can be saved,
+			 * before the page count threshold is reached.
+			 */
+			dev_info(adev->dev,
+				 "records:%d threshold:%d, resetting "
+				 "RAS table header signature",
+				 control->ras_num_bad_pages,
+				 ras->bad_page_cnt_threshold);
+			res = amdgpu_ras_eeprom_correct_header_tag(control,
+								   RAS_TABLE_HDR_VAL);
+		} else {
+			dev_warn(adev->dev,
+				"RAS records:%d exceed threshold:%d\n",
+				control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+			if ((amdgpu_bad_page_threshold == -1) ||
+			    (amdgpu_bad_page_threshold == -2)) {
+				res = 0;
+				dev_warn(adev->dev,
+					 "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
+			} else {
+				ras->is_rma = true;
+				dev_warn(adev->dev,
+					 "User defined threshold is set, runtime service will be halt when threshold is reached\n");
+			}
+		}
+	}
+
+	return res < 0 ? res : 0;
+}
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control;
+	int res;
+
+	if (!__is_ras_eeprom_supported(adev) || !ras ||
+	    amdgpu_ras_smu_eeprom_supported(adev))
+		return;
+	control = &ras->eeprom_control;
+	if (!control->is_eeprom_valid)
+		return;
+	res = __verify_ras_table_checksum(control);
+	if (res) {
+		dev_warn(adev->dev,
+			"RAS table incorrect checksum or error:%d, try to recover\n",
+			res);
+		if (!amdgpu_ras_eeprom_reset_table(control))
+			if (!amdgpu_ras_save_bad_pages(adev, NULL))
+				if (!__verify_ras_table_checksum(control)) {
+					dev_info(adev->dev, "RAS table recovery succeed\n");
+					return;
+				}
+		dev_err(adev->dev, "RAS table recovery failed\n");
+		control->is_eeprom_valid = false;
+	}
+	return;
+}
+
+static const struct ras_smu_drv *amdgpu_ras_get_smu_ras_drv(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!ras)
+		return NULL;
+
+	return ras->ras_smu_drv;
+}
+
+static uint64_t amdgpu_ras_smu_get_feature_flags(struct amdgpu_device *adev)
+{
+	const struct ras_smu_drv *ras_smu_drv = amdgpu_ras_get_smu_ras_drv(adev);
+	uint64_t flags = 0ULL;
+
+	if (!ras_smu_drv)
+		goto out;
+
+	if (ras_smu_drv->ras_smu_feature_flags)
+		ras_smu_drv->ras_smu_feature_flags(adev, &flags);
+
+out:
+	return flags;
+}
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+	uint64_t flags = 0ULL;
+
+	if (!__is_ras_eeprom_supported(adev) || !smu_ras_drv)
+		return false;
+
+	if (!smu_ras_drv->smu_eeprom_funcs)
+		return false;
+
+	flags = amdgpu_ras_smu_get_feature_flags(adev);
+
+	return !!(flags & RAS_SMU_FEATURE_BIT__RAS_EEPROM);
+}
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+				     uint32_t *table_version)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->get_ras_table_version)
+		return smu_ras_drv->smu_eeprom_funcs->get_ras_table_version(adev,
+										 table_version);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+				     uint32_t *count, uint32_t timeout)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->get_badpage_count)
+		return smu_ras_drv->smu_eeprom_funcs->get_badpage_count(adev,
+									     count, timeout);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+					uint16_t index, uint64_t *mca_addr)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr)
+		return smu_ras_drv->smu_eeprom_funcs->get_badpage_mca_addr(adev,
+										index, mca_addr);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+				 uint64_t timestamp)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->set_timestamp)
+		return smu_ras_drv->smu_eeprom_funcs->set_timestamp(adev,
+									 timestamp);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+				 uint16_t index, uint64_t *timestamp)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->get_timestamp)
+		return smu_ras_drv->smu_eeprom_funcs->get_timestamp(adev,
+									 index, timestamp);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+				    uint16_t index, uint64_t *ipid)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid)
+		return smu_ras_drv->smu_eeprom_funcs->get_badpage_ipid(adev,
+									    index, ipid);
+	return -EOPNOTSUPP;
+}
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+				   uint32_t *result)
+{
+	const struct ras_smu_drv *smu_ras_drv = amdgpu_ras_get_smu_ras_drv(adev);
+
+	if (!amdgpu_ras_smu_eeprom_supported(adev))
+		return -EOPNOTSUPP;
+
+	if (smu_ras_drv->smu_eeprom_funcs->erase_ras_table)
+		return smu_ras_drv->smu_eeprom_funcs->erase_ras_table(adev,
+									   result);
+	return -EOPNOTSUPP;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
new file mode 100644
index 000000000000..2e5d63957e71
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _AMDGPU_RAS_EEPROM_H
+#define _AMDGPU_RAS_EEPROM_H
+
+#include <linux/i2c.h>
+
+#define RAS_TABLE_VER_V1           0x00010000
+#define RAS_TABLE_VER_V2_1         0x00021000
+#define RAS_TABLE_VER_V3           0x00030000
+
+struct amdgpu_device;
+
+enum amdgpu_ras_gpu_health_status {
+	GPU_HEALTH_USABLE = 0,
+	GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
+enum amdgpu_ras_eeprom_err_type {
+	AMDGPU_RAS_EEPROM_ERR_NA,
+	AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
+	AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE,
+	AMDGPU_RAS_EEPROM_ERR_COUNT,
+};
+
+struct amdgpu_ras_eeprom_table_header {
+	uint32_t header;
+	uint32_t version;
+	uint32_t first_rec_offset;
+	uint32_t tbl_size;
+	uint32_t checksum;
+} __packed;
+
+struct amdgpu_ras_eeprom_table_ras_info {
+	u8  rma_status;
+	u8  health_percent;
+	u16 ecc_page_threshold;
+	u32 padding[64 - 1];
+} __packed;
+
+struct amdgpu_ras_eeprom_control {
+	struct amdgpu_ras_eeprom_table_header tbl_hdr;
+
+	struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
+	/* Base I2C EEPPROM 19-bit memory address,
+	 * where the table is located. For more information,
+	 * see top of amdgpu_eeprom.c.
+	 */
+	u32 i2c_address;
+
+	/* The byte offset off of @i2c_address
+	 * where the table header is found,
+	 * and where the records start--always
+	 * right after the header.
+	 */
+	u32 ras_header_offset;
+	u32 ras_info_offset;
+	u32 ras_record_offset;
+
+	/* Number of records in the table.
+	 */
+	u32 ras_num_recs;
+	u32 ras_num_recs_old;
+
+	/* the bad page number is ras_num_recs or
+	 * ras_num_recs * umc.retire_unit
+	 */
+	u32 ras_num_bad_pages;
+
+	/* Number of records store mca address */
+	u32 ras_num_mca_recs;
+
+	/* Number of records store physical address */
+	u32 ras_num_pa_recs;
+
+	/* First record index to read, 0-based.
+	 * Range is [0, num_recs-1]. This is
+	 * an absolute index, starting right after
+	 * the table header.
+	 */
+	u32 ras_fri;
+
+	/* Maximum possible number of records
+	 * we could store, i.e. the maximum capacity
+	 * of the table.
+	 */
+	u32 ras_max_record_count;
+
+	/* Protect table access via this mutex.
+	 */
+	struct mutex ras_tbl_mutex;
+
+	/* Record channel info which occurred bad pages
+	 */
+	u32 bad_channel_bitmap;
+
+	bool is_eeprom_valid;
+};
+
+/*
+ * Represents single table record. Packed to be easily serialized into byte
+ * stream.
+ */
+struct eeprom_table_record {
+
+	union {
+		uint64_t address;
+		uint64_t offset;
+	};
+
+	uint64_t retired_page;
+	uint64_t ts;
+
+	enum amdgpu_ras_eeprom_err_type err_type;
+
+	union {
+		unsigned char bank;
+		unsigned char cu;
+	};
+
+	unsigned char mem_channel;
+	unsigned char mcumc_id;
+} __packed;
+
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
+
+int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
+
+bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
+
+int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
+			   struct eeprom_table_record *records, const u32 num);
+
+int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
+			     struct eeprom_table_record *records, const u32 num);
+
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+							uint32_t *table_version);
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+								uint32_t *count, uint32_t timeout);
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+								uint16_t index, uint64_t *mca_addr);
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+										uint64_t timestamp);
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+							uint16_t index, uint64_t *timestamp);
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+								uint16_t index, uint64_t *ipid);
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+									uint32_t *result);
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+			struct eeprom_table_record *record, u32 rec_idx,
+			const u32 num);
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control);
+
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
+extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
+
+#endif // _AMDGPU_RAS_EEPROM_H
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
new file mode 100644
index 000000000000..be2e56ce1355
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#ifndef __AMDGPU_RES_CURSOR_H__
+#define __AMDGPU_RES_CURSOR_H__
+
+#include <drm/drm_mm.h>
+#include <drm/ttm/ttm_resource.h>
+#include <drm/ttm/ttm_range_manager.h>
+
+#include "amdgpu_vram_mgr.h"
+
+/* state back for walking over vram_mgr and gtt_mgr allocations */
+struct amdgpu_res_cursor {
+	uint64_t		start;
+	uint64_t		size;
+	uint64_t		remaining;
+	void			*node;
+	uint32_t		mem_type;
+};
+
+/**
+ * amdgpu_res_first - initialize a amdgpu_res_cursor
+ *
+ * @res: TTM resource object to walk
+ * @start: Start of the range
+ * @size: Size of the range
+ * @cur: cursor object to initialize
+ *
+ * Start walking over the range of allocations between @start and @size.
+ */
+static inline void amdgpu_res_first(struct ttm_resource *res,
+				    uint64_t start, uint64_t size,
+				    struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+	struct list_head *head, *next;
+	struct drm_mm_node *node;
+
+	if (!res)
+		goto fallback;
+
+	BUG_ON(start + size > res->size);
+
+	cur->mem_type = res->mem_type;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		head = &to_amdgpu_vram_mgr_resource(res)->blocks;
+
+		block = list_first_entry_or_null(head,
+						 struct drm_buddy_block,
+						 link);
+		if (!block)
+			goto fallback;
+
+		while (start >= amdgpu_vram_mgr_block_size(block)) {
+			start -= amdgpu_vram_mgr_block_size(block);
+
+			next = block->link.next;
+			if (next != head)
+				block = list_entry(next, struct drm_buddy_block, link);
+		}
+
+		cur->start = amdgpu_vram_mgr_block_start(block) + start;
+		cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
+		cur->remaining = size;
+		cur->node = block;
+		break;
+	case TTM_PL_TT:
+	case AMDGPU_PL_DOORBELL:
+	case AMDGPU_PL_MMIO_REMAP:
+		node = to_ttm_range_mgr_node(res)->mm_nodes;
+		while (start >= node->size << PAGE_SHIFT)
+			start -= node++->size << PAGE_SHIFT;
+
+		cur->start = (node->start << PAGE_SHIFT) + start;
+		cur->size = min((node->size << PAGE_SHIFT) - start, size);
+		cur->remaining = size;
+		cur->node = node;
+		break;
+	default:
+		goto fallback;
+	}
+
+	return;
+
+fallback:
+	cur->start = start;
+	cur->size = size;
+	cur->remaining = size;
+	cur->node = NULL;
+	WARN_ON(res && start + size > res->size);
+}
+
+/**
+ * amdgpu_res_next - advance the cursor
+ *
+ * @cur: the cursor to advance
+ * @size: number of bytes to move forward
+ *
+ * Move the cursor @size bytes forwrad, walking to the next node if necessary.
+ */
+static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
+{
+	struct drm_buddy_block *block;
+	struct drm_mm_node *node;
+	struct list_head *next;
+
+	BUG_ON(size > cur->remaining);
+
+	cur->remaining -= size;
+	if (!cur->remaining)
+		return;
+
+	cur->size -= size;
+	if (cur->size) {
+		cur->start += size;
+		return;
+	}
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		next = block->link.next;
+		block = list_entry(next, struct drm_buddy_block, link);
+
+		cur->node = block;
+		cur->start = amdgpu_vram_mgr_block_start(block);
+		cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
+		break;
+	case TTM_PL_TT:
+	case AMDGPU_PL_DOORBELL:
+	case AMDGPU_PL_MMIO_REMAP:
+		node = cur->node;
+
+		cur->node = ++node;
+		cur->start = node->start << PAGE_SHIFT;
+		cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+		break;
+	default:
+		return;
+	}
+}
+
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+	struct drm_buddy_block *block;
+
+	switch (cur->mem_type) {
+	case TTM_PL_VRAM:
+		block = cur->node;
+
+		if (!amdgpu_vram_mgr_is_cleared(block))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
new file mode 100644
index 000000000000..28c4ad62f50e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu_reset.h"
+#include "aldebaran.h"
+#include "sienna_cichlid.h"
+#include "smu_v13_0_10.h"
+
+static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		if (!adev->ip_blocks[i].status.hw)
+			continue;
+		/* displays are handled in phase1 */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+			continue;
+
+		/* XXX handle errors */
+		amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+		adev->ip_blocks[i].status.hw = false;
+	}
+
+	/* VCN FW shared region is in frambuffer, there are some flags
+	 * initialized in that region during sw_init. Make sure the region is
+	 * backed up.
+	 */
+	amdgpu_vcn_save_vcpu_bo(adev);
+
+	return 0;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt(
+	struct amdgpu_reset_control *reset_ctl,
+	struct amdgpu_reset_context *reset_context)
+{
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *tmp_adev;
+	int r;
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		amdgpu_unregister_gpu_instance(tmp_adev);
+		r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev);
+		if (r) {
+			dev_err(tmp_adev->dev,
+				"xgmi reset on init: prepare for reset failed");
+			return r;
+		}
+	}
+
+	return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt(
+	struct amdgpu_reset_control *reset_ctl,
+	struct amdgpu_reset_context *reset_context)
+{
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *tmp_adev = NULL;
+	int r;
+
+	r = amdgpu_device_reinit_after_reset(reset_context);
+	if (r)
+		return r;
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		if (!tmp_adev->kfd.init_complete) {
+			kgd2kfd_init_zone_device(tmp_adev);
+			amdgpu_amdkfd_device_init(tmp_adev);
+			amdgpu_amdkfd_drm_client_create(tmp_adev);
+		}
+	}
+
+	return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_perform_reset(
+	struct amdgpu_reset_control *reset_ctl,
+	struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *tmp_adev = NULL;
+	int r;
+
+	dev_dbg(adev->dev, "xgmi roi - hw reset\n");
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+		tmp_adev->reset_cntl->active_reset =
+			amdgpu_asic_reset_method(adev);
+	}
+	r = 0;
+	/* Mode1 reset needs to be triggered on all devices together */
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		/* For XGMI run all resets in parallel to speed up the process */
+		if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+			r = -EALREADY;
+		if (r) {
+			dev_err(tmp_adev->dev,
+				"xgmi reset on init: reset failed with error, %d",
+				r);
+			break;
+		}
+	}
+
+	/* For XGMI wait for all resets to complete before proceed */
+	if (!r) {
+		list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+			flush_work(&tmp_adev->xgmi_reset_work);
+			r = tmp_adev->asic_reset_res;
+			if (r)
+				break;
+		}
+	}
+
+	list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+		mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+		tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+	}
+
+	return r;
+}
+
+int amdgpu_reset_do_xgmi_reset_on_init(
+	struct amdgpu_reset_context *reset_context)
+{
+	struct list_head *reset_device_list = reset_context->reset_device_list;
+	struct amdgpu_device *adev;
+	int r;
+
+	if (!reset_device_list || list_empty(reset_device_list) ||
+	    list_is_singular(reset_device_list))
+		return -EINVAL;
+
+	adev = list_first_entry(reset_device_list, struct amdgpu_device,
+				reset_list);
+	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+	if (r)
+		return r;
+
+	r = amdgpu_reset_perform_reset(adev, reset_context);
+
+	return r;
+}
+
+struct amdgpu_reset_handler xgmi_reset_on_init_handler = {
+	.reset_method = AMD_RESET_METHOD_ON_INIT,
+	.prepare_env = NULL,
+	.prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt,
+	.perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset,
+	.restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt,
+	.restore_env = NULL,
+	.do_reset = NULL,
+};
+
+int amdgpu_reset_init(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		ret = aldebaran_reset_init(adev);
+		break;
+	case IP_VERSION(11, 0, 7):
+		ret = sienna_cichlid_reset_init(adev);
+		break;
+	case IP_VERSION(13, 0, 10):
+		ret = smu_v13_0_10_reset_init(adev);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int amdgpu_reset_fini(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+	case IP_VERSION(13, 0, 2):
+	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
+	case IP_VERSION(13, 0, 14):
+		ret = aldebaran_reset_fini(adev);
+		break;
+	case IP_VERSION(11, 0, 7):
+		ret = sienna_cichlid_reset_fini(adev);
+		break;
+	case IP_VERSION(13, 0, 10):
+		ret = smu_v13_0_10_reset_fini(adev);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+				   struct amdgpu_reset_context *reset_context)
+{
+	struct amdgpu_reset_handler *reset_handler = NULL;
+
+	if (adev->reset_cntl && adev->reset_cntl->get_reset_handler)
+		reset_handler = adev->reset_cntl->get_reset_handler(
+			adev->reset_cntl, reset_context);
+	if (!reset_handler)
+		return -EOPNOTSUPP;
+
+	return reset_handler->prepare_hwcontext(adev->reset_cntl,
+						reset_context);
+}
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+			       struct amdgpu_reset_context *reset_context)
+{
+	int ret;
+	struct amdgpu_reset_handler *reset_handler = NULL;
+
+	if (adev->reset_cntl)
+		reset_handler = adev->reset_cntl->get_reset_handler(
+			adev->reset_cntl, reset_context);
+	if (!reset_handler)
+		return -EOPNOTSUPP;
+
+	ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);
+	if (ret)
+		return ret;
+
+	return reset_handler->restore_hwcontext(adev->reset_cntl,
+						reset_context);
+}
+
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref)
+{
+	struct amdgpu_reset_domain *reset_domain = container_of(ref,
+								struct amdgpu_reset_domain,
+								refcount);
+	if (reset_domain->wq)
+		destroy_workqueue(reset_domain->wq);
+
+	kvfree(reset_domain);
+}
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+							     char *wq_name)
+{
+	struct amdgpu_reset_domain *reset_domain;
+
+	reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL);
+	if (!reset_domain) {
+		DRM_ERROR("Failed to allocate amdgpu_reset_domain!");
+		return NULL;
+	}
+
+	reset_domain->type = type;
+	kref_init(&reset_domain->refcount);
+
+	reset_domain->wq = create_singlethread_workqueue(wq_name);
+	if (!reset_domain->wq) {
+		DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!");
+		amdgpu_reset_put_reset_domain(reset_domain);
+		return NULL;
+
+	}
+
+	atomic_set(&reset_domain->in_gpu_reset, 0);
+	atomic_set(&reset_domain->reset_res, 0);
+	init_rwsem(&reset_domain->sem);
+
+	return reset_domain;
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+	atomic_set(&reset_domain->in_gpu_reset, 1);
+	down_write(&reset_domain->sem);
+}
+
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
+{
+	atomic_set(&reset_domain->in_gpu_reset, 0);
+	up_write(&reset_domain->sem);
+}
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+			   size_t len)
+{
+	if (!buf || !len)
+		return;
+
+	switch (rst_ctxt->src) {
+	case AMDGPU_RESET_SRC_JOB:
+		if (rst_ctxt->job) {
+			snprintf(buf, len, "job hang on ring:%s",
+				 rst_ctxt->job->base.sched->name);
+		} else {
+			strscpy(buf, "job hang", len);
+		}
+		break;
+	case AMDGPU_RESET_SRC_RAS:
+		strscpy(buf, "RAS error", len);
+		break;
+	case AMDGPU_RESET_SRC_MES:
+		strscpy(buf, "MES hang", len);
+		break;
+	case AMDGPU_RESET_SRC_HWS:
+		strscpy(buf, "HWS hang", len);
+		break;
+	case AMDGPU_RESET_SRC_USER:
+		strscpy(buf, "user trigger", len);
+		break;
+	case AMDGPU_RESET_SRC_USERQ:
+		strscpy(buf, "user queue trigger", len);
+		break;
+	default:
+		strscpy(buf, "unknown", len);
+	}
+}
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+	return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
new file mode 100644
index 000000000000..07b4d37f1db6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RESET_H__
+#define __AMDGPU_RESET_H__
+
+#include "amdgpu.h"
+
+#define AMDGPU_RESET_MAX_HANDLERS 5
+
+enum AMDGPU_RESET_FLAGS {
+
+	AMDGPU_NEED_FULL_RESET = 0,
+	AMDGPU_SKIP_HW_RESET = 1,
+	AMDGPU_SKIP_COREDUMP = 2,
+	AMDGPU_HOST_FLR = 3,
+};
+
+enum AMDGPU_RESET_SRCS {
+	AMDGPU_RESET_SRC_UNKNOWN,
+	AMDGPU_RESET_SRC_JOB,
+	AMDGPU_RESET_SRC_RAS,
+	AMDGPU_RESET_SRC_MES,
+	AMDGPU_RESET_SRC_HWS,
+	AMDGPU_RESET_SRC_USER,
+	AMDGPU_RESET_SRC_USERQ,
+};
+
+struct amdgpu_reset_context {
+	enum amd_reset_method method;
+	struct amdgpu_device *reset_req_dev;
+	struct amdgpu_job *job;
+	struct amdgpu_hive_info *hive;
+	struct list_head *reset_device_list;
+	unsigned long flags;
+	enum AMDGPU_RESET_SRCS src;
+};
+
+struct amdgpu_reset_handler {
+	enum amd_reset_method reset_method;
+	int (*prepare_env)(struct amdgpu_reset_control *reset_ctl,
+			   struct amdgpu_reset_context *context);
+	int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+				 struct amdgpu_reset_context *context);
+	int (*perform_reset)(struct amdgpu_reset_control *reset_ctl,
+			     struct amdgpu_reset_context *context);
+	int (*restore_hwcontext)(struct amdgpu_reset_control *reset_ctl,
+				 struct amdgpu_reset_context *context);
+	int (*restore_env)(struct amdgpu_reset_control *reset_ctl,
+			   struct amdgpu_reset_context *context);
+
+	int (*do_reset)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_reset_control {
+	void *handle;
+	struct work_struct reset_work;
+	struct mutex reset_lock;
+	struct amdgpu_reset_handler *(
+		*reset_handlers)[AMDGPU_RESET_MAX_HANDLERS];
+	atomic_t in_reset;
+	enum amd_reset_method active_reset;
+	struct amdgpu_reset_handler *(*get_reset_handler)(
+		struct amdgpu_reset_control *reset_ctl,
+		struct amdgpu_reset_context *context);
+	void (*async_reset)(struct work_struct *work);
+};
+
+
+enum amdgpu_reset_domain_type {
+	SINGLE_DEVICE,
+	XGMI_HIVE
+};
+
+struct amdgpu_reset_domain {
+	struct kref refcount;
+	struct workqueue_struct *wq;
+	enum amdgpu_reset_domain_type type;
+	struct rw_semaphore sem;
+	atomic_t in_gpu_reset;
+	atomic_t reset_res;
+};
+
+int amdgpu_reset_init(struct amdgpu_device *adev);
+int amdgpu_reset_fini(struct amdgpu_device *adev);
+
+int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,
+				   struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_perform_reset(struct amdgpu_device *adev,
+			       struct amdgpu_reset_context *reset_context);
+
+int amdgpu_reset_prepare_env(struct amdgpu_device *adev,
+			     struct amdgpu_reset_context *reset_context);
+int amdgpu_reset_restore_env(struct amdgpu_device *adev,
+			     struct amdgpu_reset_context *reset_context);
+
+struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type,
+							     char *wq_name);
+
+void amdgpu_reset_destroy_reset_domain(struct kref *ref);
+
+static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *domain)
+{
+	return kref_get_unless_zero(&domain->refcount) != 0;
+}
+
+static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
+{
+	if (domain)
+		kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+}
+
+static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
+						struct work_struct *work)
+{
+	return queue_work(domain->wq, work);
+}
+
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain)
+{
+	lockdep_assert_held(&domain->sem);
+	return rwsem_is_contended(&domain->sem);
+}
+
+void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+			   size_t len);
+
+#define for_each_handler(i, handler, reset_ctl)                  \
+	for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) &&           \
+		    (handler = (*reset_ctl->reset_handlers)[i]); \
+	     ++i)
+
+extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
+int amdgpu_reset_do_xgmi_reset_on_init(
+	struct amdgpu_reset_context *reset_context);
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
+static inline void amdgpu_reset_set_dpc_status(struct amdgpu_device *adev,
+					       bool status)
+{
+	adev->pcie_reset_ctx.occurs_dpc = status;
+	adev->no_hw_access = status;
+}
+
+static inline bool amdgpu_reset_in_dpc(struct amdgpu_device *adev)
+{
+	return adev->pcie_reset_ctx.occurs_dpc;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
new file mode 100644
index 000000000000..c596b6df2e2d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ *          Christian König
+ */
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_ras_mgr.h"
+#include "atom.h"
+
+/*
+ * Rings
+ * Most engines on the GPU are fed via ring buffers.  Ring
+ * buffers are areas of GPU accessible memory that the host
+ * writes commands into and the GPU reads commands out of.
+ * There is a rptr (read pointer) that determines where the
+ * GPU is currently reading, and a wptr (write pointer)
+ * which determines where the host has written.  When the
+ * pointers are equal, the ring is idle.  When the host
+ * writes commands to the ring buffer, it increments the
+ * wptr.  The GPU then starts fetching commands and executes
+ * them until the pointers are equal again.
+ */
+
+/**
+ * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission.
+ *
+ * @type: ring type for which to return the limit.
+ */
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type)
+{
+	switch (type) {
+	case AMDGPU_RING_TYPE_GFX:
+		/* Need to keep at least 192 on GFX7+ for old radv. */
+		return 192;
+	case AMDGPU_RING_TYPE_COMPUTE:
+		return 125;
+	case AMDGPU_RING_TYPE_VCN_JPEG:
+		return 16;
+	default:
+		return 49;
+	}
+}
+
+/**
+ * amdgpu_ring_alloc - allocate space on the ring buffer
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
+{
+	/* Align requested size with padding so unlock_commit can
+	 * pad safely */
+	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+	/* Make sure we aren't trying to allocate more space
+	 * than the maximum for one submission
+	 */
+	if (WARN_ON_ONCE(ndw > ring->max_dw))
+		return -ENOMEM;
+
+	ring->count_dw = ndw;
+	ring->wptr_old = ring->wptr;
+
+	if (ring->funcs->begin_use)
+		ring->funcs->begin_use(ring);
+
+	return 0;
+}
+
+/**
+ * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * doesn't check the max_dw limit as we may be reemitting
+ * several submissions.
+ */
+static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw)
+{
+	/* Align requested size with padding so unlock_commit can
+	 * pad safely */
+	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+	ring->count_dw = ndw;
+	ring->wptr_old = ring->wptr;
+
+	if (ring->funcs->begin_use)
+		ring->funcs->begin_use(ring);
+}
+
+/** amdgpu_ring_insert_nop - insert NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @count: the number of NOP packets to insert
+ *
+ * This is the generic insert_nop function for rings except SDMA
+ */
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	uint32_t occupied, chunk1, chunk2;
+
+	occupied = ring->wptr & ring->buf_mask;
+	chunk1 = ring->buf_mask + 1 - occupied;
+	chunk1 = (chunk1 >= count) ? count : chunk1;
+	chunk2 = count - chunk1;
+
+	if (chunk1)
+		memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
+
+	if (chunk2)
+		memset32(ring->ring, ring->funcs->nop, chunk2);
+
+	ring->wptr += count;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw -= count;
+}
+
+/**
+ * amdgpu_ring_generic_pad_ib - pad IB with NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: IB to add NOP packets to
+ *
+ * This is the generic pad_ib function for rings except SDMA
+ */
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+	u32 align_mask = ring->funcs->align_mask;
+	u32 count = ib->length_dw & align_mask;
+
+	if (count) {
+		count = align_mask + 1 - count;
+
+		memset32(&ib->ptr[ib->length_dw], ring->funcs->nop, count);
+
+		ib->length_dw += count;
+	}
+}
+
+/**
+ * amdgpu_ring_commit - tell the GPU to execute the new
+ * commands on the ring buffer
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Update the wptr (write pointer) to tell the GPU to
+ * execute new commands on the ring buffer (all asics).
+ */
+void amdgpu_ring_commit(struct amdgpu_ring *ring)
+{
+	uint32_t count;
+
+	if (ring->count_dw < 0)
+		DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
+	/* We pad to match fetch size */
+	count = ring->funcs->align_mask + 1 -
+		(ring->wptr & ring->funcs->align_mask);
+	count &= ring->funcs->align_mask;
+
+	if (count != 0)
+		ring->funcs->insert_nop(ring, count);
+
+	mb();
+	amdgpu_ring_set_wptr(ring);
+
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
+}
+
+/**
+ * amdgpu_ring_undo - reset the wptr
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Reset the driver's copy of the wptr (all asics).
+ */
+void amdgpu_ring_undo(struct amdgpu_ring *ring)
+{
+	ring->wptr = ring->wptr_old;
+
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
+}
+
+#define amdgpu_ring_get_gpu_addr(ring, offset)				\
+	 (ring->adev->wb.gpu_addr + offset * 4)
+
+#define amdgpu_ring_get_cpu_addr(ring, offset)				\
+	 (&ring->adev->wb.wb[offset])
+
+/**
+ * amdgpu_ring_init - init driver ring struct.
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring structure holding ring information
+ * @max_dw: maximum number of dw for ring alloc
+ * @irq_src: interrupt source to use for this ring
+ * @irq_type: interrupt type to use for this ring
+ * @hw_prio: ring priority (NORMAL/HIGH)
+ * @sched_score: optional score atomic shared with other schedulers
+ *
+ * Initialize the driver information for the selected ring (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int hw_prio,
+		     atomic_t *sched_score)
+{
+	int r;
+	int sched_hw_submission = amdgpu_sched_hw_submission;
+	u32 *num_sched;
+	u32 hw_ip;
+	unsigned int max_ibs_dw;
+
+	/* Set the hw submission limit higher for KIQ because
+	 * it's used for a number of gfx/compute tasks by both
+	 * KFD and KGD which may have outstanding fences and
+	 * it doesn't really use the gpu scheduler anyway;
+	 * KIQ tasks get submitted directly to the ring.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		sched_hw_submission = max(sched_hw_submission, 256);
+	if (ring->funcs->type == AMDGPU_RING_TYPE_MES)
+		sched_hw_submission = 8;
+	else if (ring == &adev->sdma.instance[0].page)
+		sched_hw_submission = 256;
+
+	if (ring->adev == NULL) {
+		if (adev->num_rings >= AMDGPU_MAX_RINGS)
+			return -EINVAL;
+
+		ring->adev = adev;
+		ring->num_hw_submission = sched_hw_submission;
+		ring->sched_score = sched_score;
+		ring->vmid_wait = dma_fence_get_stub();
+
+		ring->idx = adev->num_rings++;
+		adev->rings[ring->idx] = ring;
+
+		r = amdgpu_fence_driver_init_ring(ring);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+		return r;
+	}
+
+	r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+	if (r) {
+		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+		return r;
+	}
+
+	ring->fence_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->fence_offs);
+	ring->fence_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->fence_offs);
+
+	ring->rptr_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs);
+	ring->rptr_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs);
+
+	ring->wptr_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs);
+	ring->wptr_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs);
+
+	ring->trail_fence_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs);
+	ring->trail_fence_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs);
+
+	ring->cond_exe_gpu_addr =
+		amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs);
+	ring->cond_exe_cpu_addr =
+		amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs);
+
+	/* always set cond_exec_polling to CONTINUE */
+	*ring->cond_exe_cpu_addr = 1;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+		r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+		if (r) {
+			dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+			return r;
+		}
+
+		max_ibs_dw = ring->funcs->emit_frame_size +
+			     amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+		max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+
+		if (WARN_ON(max_ibs_dw > max_dw))
+			max_dw = max_ibs_dw;
+
+		ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+	} else {
+		ring->ring_size = roundup_pow_of_two(max_dw * 4);
+		ring->count_dw = (ring->ring_size - 4) >> 2;
+		/* ring buffer is empty now */
+		ring->wptr = *ring->rptr_cpu_addr = 0;
+	}
+
+	ring->buf_mask = (ring->ring_size / 4) - 1;
+	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+		0xffffffffffffffff : ring->buf_mask;
+	/*  Initialize cached_rptr to 0 */
+	ring->cached_rptr = 0;
+
+	if (!ring->ring_backup) {
+		ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL);
+		if (!ring->ring_backup)
+			return -ENOMEM;
+	}
+
+	/* Allocate ring buffer */
+	if (ring->ring_obj == NULL) {
+		r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_bytes,
+					    PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &ring->ring_obj,
+					    &ring->gpu_addr,
+					    (void **)&ring->ring);
+		if (r) {
+			dev_err(adev->dev, "(%d) ring create failed\n", r);
+			kvfree(ring->ring_backup);
+			return r;
+		}
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	ring->max_dw = max_dw;
+	ring->hw_prio = hw_prio;
+
+	if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) {
+		hw_ip = ring->funcs->type;
+		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
+		adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
+			&ring->sched;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ring_fini - tear down the driver ring struct.
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Tear down the driver information for the selected ring (all asics).
+ */
+void amdgpu_ring_fini(struct amdgpu_ring *ring)
+{
+
+	/* Not to finish a ring which is not initialized */
+	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
+		return;
+
+	ring->sched.ready = false;
+
+	amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+	amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
+
+	amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+	amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+
+	amdgpu_bo_free_kernel(&ring->ring_obj,
+			      &ring->gpu_addr,
+			      (void **)&ring->ring);
+	kvfree(ring->ring_backup);
+	ring->ring_backup = NULL;
+
+	dma_fence_put(ring->vmid_wait);
+	ring->vmid_wait = NULL;
+	ring->me = 0;
+}
+
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @ring: ring to write to
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t reg1,
+						uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
+/**
+ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
+ *
+ * @ring: ring to try the recovery on
+ * @vmid: VMID we try to get going again
+ * @fence: timedout fence
+ *
+ * Tries to get a ring proceeding again when it is stuck.
+ */
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+			       struct dma_fence *fence)
+{
+	unsigned long flags;
+	ktime_t deadline;
+	bool ret;
+
+	deadline = ktime_add_us(ktime_get(), 10000);
+
+	if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
+		return false;
+
+	spin_lock_irqsave(fence->lock, flags);
+	if (!dma_fence_is_signaled_locked(fence))
+		dma_fence_set_error(fence, -ENODATA);
+	spin_unlock_irqrestore(fence->lock, flags);
+
+	while (!dma_fence_is_signaled(fence) &&
+	       ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+		ring->funcs->soft_recovery(ring, vmid);
+
+	ret = dma_fence_is_signaled(fence);
+	/* increment the counter only if soft reset worked */
+	if (ret)
+		atomic_inc(&ring->adev->gpu_reset_counter);
+
+	return ret;
+}
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
+					    size_t size, loff_t *offset)
+{
+	const uint8_t ring_header_size = 12;
+	struct amdgpu_ring *ring = file_inode(f)->i_private;
+	struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
+		kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
+	struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
+		kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
+	struct ras_cmd_cper_record_req *record_req __free(kfree) =
+		kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
+	struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
+		kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
+	uint8_t *ring_header __free(kfree) =
+		kzalloc(ring_header_size, GFP_KERNEL);
+	uint32_t total_cper_num;
+	uint64_t start_cper_id;
+	int r;
+
+	if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
+	    !ring_header)
+		return -ENOMEM;
+
+	if (!(*offset)) {
+		/* Need at least 12 bytes for the header on the first read */
+		if (size < ring_header_size)
+			return -EINVAL;
+
+		if (copy_to_user(buf, ring_header, ring_header_size))
+			return -EFAULT;
+		buf += ring_header_size;
+		size -= ring_header_size;
+	}
+
+	r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
+					  RAS_CMD__GET_CPER_SNAPSHOT,
+					  snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
+					  snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
+	if (r || !snapshot_rsp->total_cper_num)
+		return r;
+
+	start_cper_id = snapshot_rsp->start_cper_id;
+	total_cper_num = snapshot_rsp->total_cper_num;
+
+	record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
+	record_req->buf_size = size;
+	record_req->cper_start_id = start_cper_id + *offset;
+	record_req->cper_num = total_cper_num;
+	r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
+					  record_req, sizeof(struct ras_cmd_cper_record_req),
+					  record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
+	if (r)
+		return r;
+
+	r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
+	(*offset) += record_rsp->real_cper_num;
+
+	return r;
+}
+
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_ring *ring = file_inode(f)->i_private;
+	uint32_t value, result, early[3];
+	uint64_t p;
+	loff_t i;
+	int r;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
+		return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	result = 0;
+
+	if (*pos < 12) {
+		if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+			mutex_lock(&ring->adev->cper.ring_lock);
+
+		early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
+		early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
+		early[2] = ring->wptr & ring->buf_mask;
+		for (i = *pos / 4; i < 3 && size; i++) {
+			r = put_user(early[i], (uint32_t *)buf);
+			if (r) {
+				result = r;
+				goto out;
+			}
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
+	}
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+		while (size) {
+			if (*pos >= (ring->ring_size + 12))
+				return result;
+
+			value = ring->ring[(*pos - 12)/4];
+			r = put_user(value, (uint32_t *)buf);
+			if (r)
+				return r;
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
+	} else {
+		p = early[0];
+		if (early[0] <= early[1])
+			size = (early[1] - early[0]);
+		else
+			size = ring->ring_size - (early[0] - early[1]);
+
+		while (size) {
+			if (p == early[1])
+				goto out;
+
+			value = ring->ring[p];
+			r = put_user(value, (uint32_t *)buf);
+			if (r) {
+				result = r;
+				goto out;
+			}
+
+			buf += 4;
+			result += 4;
+			size--;
+			p++;
+			p &= ring->ptr_mask;
+		}
+	}
+
+out:
+	if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+		mutex_unlock(&ring->adev->cper.ring_lock);
+
+	return result;
+}
+
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+	size_t size, loff_t *pos)
+{
+	struct amdgpu_ring *ring = file_inode(f)->i_private;
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+		amdgpu_virt_req_ras_cper_dump(ring->adev, false);
+
+	return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
+
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_ring_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_virt_ring_read,
+	.llseek = default_llseek
+};
+
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+				       size_t size, loff_t *pos)
+{
+	struct amdgpu_ring *ring = file_inode(f)->i_private;
+	ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size);
+	void *from = ((u8 *)ring->mqd_ptr) + *pos;
+
+	if (*pos > ring->mqd_size)
+		return 0;
+
+	if (copy_to_user(buf, from, bytes))
+		return -EFAULT;
+
+	*pos += bytes;
+	return bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_mqd_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_mqd_read,
+	.llseek = default_llseek
+};
+
+static int amdgpu_debugfs_ring_error(void *data, u64 val)
+{
+	struct amdgpu_ring *ring = data;
+
+	amdgpu_fence_driver_set_error(ring, val);
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL,
+				amdgpu_debugfs_ring_error, "%lld\n");
+
+#endif
+
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	sprintf(name, "amdgpu_ring_%s", ring->name);
+	if (amdgpu_sriov_vf(adev))
+		debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+					 &amdgpu_debugfs_virt_ring_fops,
+					 ring->ring_size + 12);
+	else
+		debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+					 &amdgpu_debugfs_ring_fops,
+					 ring->ring_size + 12);
+
+	if (ring->mqd_obj) {
+		sprintf(name, "amdgpu_mqd_%s", ring->name);
+		debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+					 &amdgpu_debugfs_mqd_fops,
+					 ring->mqd_size);
+	}
+
+	sprintf(name, "amdgpu_error_%s", ring->name);
+	debugfs_create_file(name, 0200, root, ring,
+			    &amdgpu_debugfs_error_fops);
+
+#endif
+}
+
+/**
+ * amdgpu_ring_test_helper - tests ring and set sched readiness status
+ *
+ * @ring: ring to try the recovery on
+ *
+ * Tests ring and set sched readiness status
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	r = amdgpu_ring_test_ring(ring);
+	if (r)
+		DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
+			      ring->name, r);
+	else
+		DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
+			      ring->name);
+
+	ring->sched.ready = !r;
+
+	return r;
+}
+
+static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
+				    struct amdgpu_mqd_prop *prop)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+				    amdgpu_gfx_is_high_priority_compute_queue(adev, ring);
+	bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+				amdgpu_gfx_is_high_priority_graphics_queue(adev, ring);
+
+	memset(prop, 0, sizeof(*prop));
+
+	prop->mqd_gpu_addr = ring->mqd_gpu_addr;
+	prop->hqd_base_gpu_addr = ring->gpu_addr;
+	prop->rptr_gpu_addr = ring->rptr_gpu_addr;
+	prop->wptr_gpu_addr = ring->wptr_gpu_addr;
+	prop->queue_size = ring->ring_size;
+	prop->eop_gpu_addr = ring->eop_gpu_addr;
+	prop->use_doorbell = ring->use_doorbell;
+	prop->doorbell_index = ring->doorbell_index;
+	prop->kernel_queue = true;
+
+	/* map_queues packet doesn't need activate the queue,
+	 * so only kiq need set this field.
+	 */
+	prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
+
+	prop->allow_tunneling = is_high_prio_compute;
+	if (is_high_prio_compute || is_high_prio_gfx) {
+		prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+		prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+	}
+}
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_mqd *mqd_mgr;
+	struct amdgpu_mqd_prop prop;
+
+	amdgpu_ring_to_mqd_prop(ring, &prop);
+
+	ring->wptr = 0;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE];
+	else
+		mqd_mgr = &adev->mqds[ring->funcs->type];
+
+	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
+}
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_end(ring);
+}
+
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL);
+}
+
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE);
+}
+
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
+}
+
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
+{
+	if (!ring)
+		return false;
+
+	if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
+		return false;
+
+	return true;
+}
+
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+				    struct amdgpu_fence *guilty_fence)
+{
+	/* Stop the scheduler to prevent anybody else from touching the ring buffer. */
+	drm_sched_wqueue_stop(&ring->sched);
+	/* back up the non-guilty commands */
+	amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
+}
+
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+				 struct amdgpu_fence *guilty_fence)
+{
+	unsigned int i;
+	int r;
+
+	/* verify that the ring is functional */
+	r = amdgpu_ring_test_ring(ring);
+	if (r)
+		return r;
+
+	/* signal the guilty fence and set an error on all fences from the context */
+	if (guilty_fence)
+		amdgpu_fence_driver_guilty_force_completion(guilty_fence);
+	/* Re-emit the non-guilty commands */
+	if (ring->ring_backup_entries_to_copy) {
+		amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy);
+		for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
+			amdgpu_ring_write(ring, ring->ring_backup[i]);
+		amdgpu_ring_commit(ring);
+	}
+	/* Start the scheduler again */
+	drm_sched_wqueue_start(&ring->sched);
+	return 0;
+}
+
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+					 u32 reset_type)
+{
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		if (ring->adev->gfx.gfx_supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_COMPUTE:
+		if (ring->adev->gfx.compute_supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_SDMA:
+		if (ring->adev->sdma.supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_VCN_DEC:
+	case AMDGPU_RING_TYPE_VCN_ENC:
+		if (ring->adev->vcn.supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_VCN_JPEG:
+		if (ring->adev->jpeg.supported_reset & reset_type)
+			return true;
+		break;
+	default:
+		break;
+	}
+	return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
new file mode 100644
index 000000000000..7a27c6c4bb44
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -0,0 +1,580 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_RING_H__
+#define __AMDGPU_RING_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
+#include <drm/drm_suballoc.h>
+
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_ib;
+struct amdgpu_cs_parser;
+struct amdgpu_job;
+struct amdgpu_vm;
+
+/* max number of rings */
+#define AMDGPU_MAX_RINGS		149
+#define AMDGPU_MAX_HWIP_RINGS		64
+#define AMDGPU_MAX_GFX_RINGS		2
+#define AMDGPU_MAX_SW_GFX_RINGS         2
+#define AMDGPU_MAX_COMPUTE_RINGS	8
+#define AMDGPU_MAX_VCE_RINGS		3
+#define AMDGPU_MAX_UVD_ENC_RINGS	2
+#define AMDGPU_MAX_VPE_RINGS		2
+
+enum amdgpu_ring_priority_level {
+	AMDGPU_RING_PRIO_0,
+	AMDGPU_RING_PRIO_1,
+	AMDGPU_RING_PRIO_DEFAULT = 1,
+	AMDGPU_RING_PRIO_2,
+	AMDGPU_RING_PRIO_MAX
+};
+
+/* some special values for the owner field */
+#define AMDGPU_FENCE_OWNER_UNDEFINED	((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM		((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD		((void *)2ul)
+
+#define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
+#define AMDGPU_FENCE_FLAG_INT           (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)
+
+#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
+
+#define AMDGPU_IB_POOL_SIZE	(1024 * 1024)
+
+enum amdgpu_ring_type {
+	AMDGPU_RING_TYPE_GFX		= AMDGPU_HW_IP_GFX,
+	AMDGPU_RING_TYPE_COMPUTE	= AMDGPU_HW_IP_COMPUTE,
+	AMDGPU_RING_TYPE_SDMA		= AMDGPU_HW_IP_DMA,
+	AMDGPU_RING_TYPE_UVD		= AMDGPU_HW_IP_UVD,
+	AMDGPU_RING_TYPE_VCE		= AMDGPU_HW_IP_VCE,
+	AMDGPU_RING_TYPE_UVD_ENC	= AMDGPU_HW_IP_UVD_ENC,
+	AMDGPU_RING_TYPE_VCN_DEC	= AMDGPU_HW_IP_VCN_DEC,
+	AMDGPU_RING_TYPE_VCN_ENC	= AMDGPU_HW_IP_VCN_ENC,
+	AMDGPU_RING_TYPE_VCN_JPEG	= AMDGPU_HW_IP_VCN_JPEG,
+	AMDGPU_RING_TYPE_VPE		= AMDGPU_HW_IP_VPE,
+	AMDGPU_RING_TYPE_KIQ,
+	AMDGPU_RING_TYPE_MES,
+	AMDGPU_RING_TYPE_UMSCH_MM,
+	AMDGPU_RING_TYPE_CPER,
+	AMDGPU_RING_TYPE_MAX,
+};
+
+enum amdgpu_ib_pool_type {
+	/* Normal submissions to the top of the pipeline. */
+	AMDGPU_IB_POOL_DELAYED,
+	/* Immediate submissions to the bottom of the pipeline. */
+	AMDGPU_IB_POOL_IMMEDIATE,
+	/* Direct submission to the ring buffer during init and reset. */
+	AMDGPU_IB_POOL_DIRECT,
+
+	AMDGPU_IB_POOL_MAX
+};
+
+struct amdgpu_ib {
+	struct drm_suballoc		*sa_bo;
+	uint32_t			length_dw;
+	uint64_t			gpu_addr;
+	uint32_t			*ptr;
+	uint32_t			flags;
+};
+
+struct amdgpu_sched {
+	u32				num_scheds;
+	struct drm_gpu_scheduler	*sched[AMDGPU_MAX_HWIP_RINGS];
+};
+
+/*
+ * Fences.
+ */
+struct amdgpu_fence_driver {
+	uint64_t			gpu_addr;
+	uint32_t			*cpu_addr;
+	/* sync_seq is protected by ring emission lock */
+	uint32_t			sync_seq;
+	atomic_t			last_seq;
+	u64				signalled_wptr;
+	bool				initialized;
+	struct amdgpu_irq_src		*irq_src;
+	unsigned			irq_type;
+	struct timer_list		fallback_timer;
+	unsigned			num_fences_mask;
+	spinlock_t			lock;
+	struct dma_fence		**fences;
+};
+
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization.  When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+	struct dma_fence base;
+
+	/* RB, DMA, etc. */
+	struct amdgpu_ring		*ring;
+	ktime_t				start_timestamp;
+
+	/* wptr for the fence for resets */
+	u64				wptr;
+	/* fence context for resets */
+	u64				context;
+};
+
+extern const struct drm_sched_backend_ops amdgpu_sched_ops;
+
+void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
+void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af);
+void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+				   struct amdgpu_irq_src *irq_src,
+				   unsigned irq_type);
+void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
+void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct amdgpu_fence *af,
+		      unsigned int flags);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+			      uint32_t timeout);
+bool amdgpu_fence_process(struct amdgpu_ring *ring);
+int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
+signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
+				      uint32_t wait_seq,
+				      signed long timeout);
+unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
+
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+					 ktime_t timestamp);
+
+/*
+ * Rings.
+ */
+
+/* provided by hw blocks that expose a ring buffer for commands */
+struct amdgpu_ring_funcs {
+	/**
+	 * @type:
+	 *
+	 * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+	 * use ring buffers. The type field just identifies which component the
+	 * ring buffer is associated with.
+	 */
+	enum amdgpu_ring_type	type;
+	uint32_t		align_mask;
+
+	/**
+	 * @nop:
+	 *
+	 * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+	 * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+	 * etc). This field receives the specific no-op for the component
+	 * that initializes the ring.
+	 */
+	u32			nop;
+	bool			support_64bit_ptrs;
+	bool			no_user_fence;
+	bool			secure_submission_supported;
+
+	/**
+	 * @extra_bytes:
+	 *
+	 * Optional extra space in bytes that is added to the ring size
+	 * when allocating the BO that holds the contents of the ring.
+	 * This space isn't used for command submission to the ring,
+	 * but is just there to satisfy some hardware requirements or
+	 * implement workarounds. It's up to the implementation of each
+	 * specific ring to initialize this space.
+	 */
+	unsigned		extra_bytes;
+
+	/* ring read/write ptr handling */
+	u64 (*get_rptr)(struct amdgpu_ring *ring);
+	u64 (*get_wptr)(struct amdgpu_ring *ring);
+	void (*set_wptr)(struct amdgpu_ring *ring);
+	/* validating and patching of IBs */
+	int (*parse_cs)(struct amdgpu_cs_parser *p,
+			struct amdgpu_job *job,
+			struct amdgpu_ib *ib);
+	int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
+				 struct amdgpu_job *job,
+				 struct amdgpu_ib *ib);
+	/* constants to calculate how many DW are needed for an emit */
+	unsigned emit_frame_size;
+	unsigned emit_ib_size;
+	/* command emit functions */
+	void (*emit_ib)(struct amdgpu_ring *ring,
+			struct amdgpu_job *job,
+			struct amdgpu_ib *ib,
+			uint32_t flags);
+	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
+			   uint64_t seq, unsigned flags);
+	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
+	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
+			      uint64_t pd_addr);
+	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
+				uint32_t gds_base, uint32_t gds_size,
+				uint32_t gws_base, uint32_t gws_size,
+				uint32_t oa_base, uint32_t oa_size);
+	/* testing functions */
+	int (*test_ring)(struct amdgpu_ring *ring);
+	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
+	/* insert NOP packets */
+	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+	void (*insert_start)(struct amdgpu_ring *ring);
+	void (*insert_end)(struct amdgpu_ring *ring);
+	/* pad the indirect buffer to the necessary number of dw */
+	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+	unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
+	/* note usage for clock and power gating */
+	void (*begin_use)(struct amdgpu_ring *ring);
+	void (*end_use)(struct amdgpu_ring *ring);
+	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+	void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va,
+				u64 gds_va, bool init_shadow, int vmid);
+	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+			  uint32_t reg_val_offs);
+	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+			      uint32_t val, uint32_t mask);
+	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+					uint32_t reg0, uint32_t reg1,
+					uint32_t ref, uint32_t mask);
+	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
+				bool secure);
+	/* Try to soft recover the ring to make the fence signal */
+	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
+	int (*preempt_ib)(struct amdgpu_ring *ring);
+	void (*emit_mem_sync)(struct amdgpu_ring *ring);
+	void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
+	void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset);
+	void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset);
+	void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
+	int (*reset)(struct amdgpu_ring *ring, unsigned int vmid,
+		     struct amdgpu_fence *timedout_fence);
+	void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
+};
+
+/**
+ * amdgpu_ring - Holds ring information
+ */
+struct amdgpu_ring {
+	struct amdgpu_device		*adev;
+	const struct amdgpu_ring_funcs	*funcs;
+	struct amdgpu_fence_driver	fence_drv;
+	struct drm_gpu_scheduler	sched;
+
+	struct amdgpu_bo	*ring_obj;
+	uint32_t		*ring;
+	/* backups for resets */
+	uint32_t		*ring_backup;
+	unsigned int		ring_backup_entries_to_copy;
+	unsigned		rptr_offs;
+	u64			rptr_gpu_addr;
+	u32			*rptr_cpu_addr;
+
+	/**
+	 * @wptr:
+	 *
+	 * This is part of the Ring buffer implementation and represents the
+	 * write pointer. The wptr determines where the host has written.
+	 */
+	u64			wptr;
+
+	/**
+	 * @wptr_old:
+	 *
+	 * Before update wptr with the new value, usually the old value is
+	 * stored in the wptr_old.
+	 */
+	u64			wptr_old;
+	unsigned		ring_size;
+
+	/**
+	 * @max_dw:
+	 *
+	 * Maximum number of DWords for ring allocation. This information is
+	 * provided at the ring initialization time, and each IP block can
+	 * specify a specific value. Check places that invoke
+	 * amdgpu_ring_init() to see the maximum size per block.
+	 */
+	unsigned		max_dw;
+
+	/**
+	 * @count_dw:
+	 *
+	 * This value starts with the maximum amount of DWords supported by the
+	 * ring. This value is updated based on the ring manipulation.
+	 */
+	int			count_dw;
+	uint64_t		gpu_addr;
+
+	/**
+	 * @ptr_mask:
+	 *
+	 * Some IPs provide support for 64-bit pointers and others for 32-bit
+	 * only; this behavior is component-specific and defined by the field
+	 * support_64bit_ptr. If the IP block supports 64-bits, the mask
+	 * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+	 * Notice that this field is used to keep wptr under a valid range.
+	 */
+	uint64_t		ptr_mask;
+
+	/**
+	 * @buf_mask:
+	 *
+	 * Buffer mask is a value used to keep wptr count under its
+	 * thresholding. Buffer mask initialized during the ring buffer
+	 * initialization time, and it is defined as (ring_size / 4) -1.
+	 */
+	uint32_t		buf_mask;
+	u32			idx;
+	u32			xcc_id;
+	u32			xcp_id;
+	u32			me;
+	u32			pipe;
+	u32			queue;
+	struct amdgpu_bo	*mqd_obj;
+	uint64_t                mqd_gpu_addr;
+	void                    *mqd_ptr;
+	unsigned                mqd_size;
+	uint64_t                eop_gpu_addr;
+	u32			doorbell_index;
+	bool			use_doorbell;
+	bool			use_pollmem;
+	unsigned		wptr_offs;
+	u64			wptr_gpu_addr;
+
+	/**
+	 * @wptr_cpu_addr:
+	 *
+	 * This is the CPU address pointer in the writeback slot. This is used
+	 * to commit changes to the GPU.
+	 */
+	u32			*wptr_cpu_addr;
+	unsigned		fence_offs;
+	u64			fence_gpu_addr;
+	u32			*fence_cpu_addr;
+	uint64_t		current_ctx;
+	char			name[16];
+	u32                     trail_seq;
+	unsigned		trail_fence_offs;
+	u64			trail_fence_gpu_addr;
+	u32			*trail_fence_cpu_addr;
+	unsigned		cond_exe_offs;
+	u64			cond_exe_gpu_addr;
+	u32			*cond_exe_cpu_addr;
+	unsigned int		set_q_mode_offs;
+	u32			*set_q_mode_ptr;
+	u64			set_q_mode_token;
+	unsigned		vm_hub;
+	unsigned		vm_inv_eng;
+	struct dma_fence	*vmid_wait;
+	bool			has_compute_vm_bug;
+	bool			no_scheduler;
+	bool			no_user_submission;
+	int			hw_prio;
+	unsigned 		num_hw_submission;
+	atomic_t		*sched_score;
+
+	bool            is_sw_ring;
+	unsigned int    entry_index;
+	/* store the cached rptr to restore after reset */
+	uint64_t cached_rptr;
+};
+
+#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
+#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
+#define amdgpu_ring_test_ib(r, t) ((r)->funcs->test_ib ? (r)->funcs->test_ib((r), (t)) : 0)
+#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
+#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
+#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
+#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
+#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
+#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
+#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
+#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)))
+#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
+#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
+#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
+#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
+#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
+#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
+#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o)))
+#define amdgpu_ring_reset(r, v, f) (r)->funcs->reset((r), (v), (f))
+
+unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type);
+int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
+
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_ring_commit(struct amdgpu_ring *ring);
+void amdgpu_ring_undo(struct amdgpu_ring *ring);
+int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
+		     unsigned int max_dw, struct amdgpu_irq_src *irq_src,
+		     unsigned int irq_type, unsigned int hw_prio,
+		     atomic_t *sched_score);
+void amdgpu_ring_fini(struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t val0,
+						uint32_t reg1, uint32_t val1);
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+			       struct dma_fence *fence);
+
+static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
+							bool cond_exec)
+{
+	*ring->cond_exe_cpu_addr = cond_exec;
+}
+
+static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
+{
+	memset32(ring->ring, ring->funcs->nop, ring->buf_mask + 1);
+}
+
+static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
+{
+	ring->ring[ring->wptr++ & ring->buf_mask] = v;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw--;
+}
+
+static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
+					      void *src, int count_dw)
+{
+	unsigned occupied, chunk1, chunk2;
+
+	occupied = ring->wptr & ring->buf_mask;
+	chunk1 = ring->buf_mask + 1 - occupied;
+	chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
+	chunk2 = count_dw - chunk1;
+	chunk1 <<= 2;
+	chunk2 <<= 2;
+
+	if (chunk1)
+		memcpy(&ring->ring[occupied], src, chunk1);
+
+	if (chunk2) {
+		src += chunk1;
+		memcpy(ring->ring, src, chunk2);
+	}
+
+	ring->wptr += count_dw;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw -= count_dw;
+}
+
+/**
+ * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
+ * @ring: amdgpu_ring structure
+ * @offset: offset returned by amdgpu_ring_init_cond_exec
+ *
+ * Calculate the dw count and patch it into a cond_exec command.
+ */
+static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
+					       unsigned int offset)
+{
+	unsigned cur;
+
+	if (!ring->funcs->init_cond_exec)
+		return;
+
+	WARN_ON(offset > ring->buf_mask);
+	WARN_ON(ring->ring[offset] != 0);
+
+	cur = (ring->wptr - 1) & ring->buf_mask;
+	if (cur < offset)
+		cur += ring->ring_size >> 2;
+	ring->ring[offset] = cur - offset;
+}
+
+int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
+
+void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
+			      struct amdgpu_ring *ring);
+
+int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
+
+static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
+{
+	return ib->ptr[idx];
+}
+
+static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
+				       uint32_t value)
+{
+	ib->ptr[idx] = value;
+}
+
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		  unsigned size,
+		  enum amdgpu_ib_pool_type pool,
+		  struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+		       struct amdgpu_ib *ibs, struct amdgpu_job *job,
+		       struct dma_fence **f);
+int amdgpu_ib_pool_init(struct amdgpu_device *adev);
+void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
+int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
+bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
+void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
+					     struct amdgpu_fence *guilty_fence);
+void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
+				    struct amdgpu_fence *guilty_fence);
+int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
+				 struct amdgpu_fence *guilty_fence);
+bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring,
+					 u32 reset_type);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..7e7d6c3865bc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,576 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+	unsigned int hw_pio;
+	const char *ring_name;
+} sw_ring_info[] = {
+	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+	{ AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+								struct amdgpu_ring *ring)
+{
+	return ring->entry_index < mux->ring_entry_size ?
+			&mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+						  struct amdgpu_ring *ring,
+						  u64 s_start, u64 s_end)
+{
+	u64 start, end;
+	struct amdgpu_ring *real_ring = mux->real_ring;
+
+	start = s_start & ring->buf_mask;
+	end = s_end & ring->buf_mask;
+
+	if (start == end) {
+		DRM_ERROR("no more data copied from sw ring\n");
+		return;
+	}
+	if (start > end) {
+		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+					   (ring->ring_size >> 2) - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+	} else {
+		amdgpu_ring_alloc(real_ring, end - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+	}
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e = NULL;
+	struct amdgpu_mux_chunk *chunk;
+	uint32_t seq, last_seq;
+	int i;
+
+	/*find low priority entries:*/
+	if (!mux->s_resubmit)
+		return;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+			e = &mux->ring_entry[i];
+			break;
+		}
+	}
+
+	if (!e) {
+		DRM_ERROR("%s no low priority ring found\n", __func__);
+		return;
+	}
+
+	last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+	seq = mux->seqno_to_resubmit;
+	if (last_seq < seq) {
+		/*resubmit all the fences between (last_seq, seq]*/
+		list_for_each_entry(chunk, &e->list, entry) {
+			if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+				amdgpu_fence_update_start_timestamp(e->ring,
+								    chunk->sync_seq,
+								    ktime_get());
+				if (chunk->sync_seq ==
+					le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) {
+					if (chunk->cntl_offset <= e->ring->buf_mask)
+						amdgpu_ring_patch_cntl(e->ring,
+								       chunk->cntl_offset);
+					if (chunk->ce_offset <= e->ring->buf_mask)
+						amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
+					if (chunk->de_offset <= e->ring->buf_mask)
+						amdgpu_ring_patch_de(e->ring, chunk->de_offset);
+				}
+				amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+								      chunk->start,
+								      chunk->end);
+				mux->wptr_resubmit = chunk->end;
+				amdgpu_ring_commit(mux->real_ring);
+			}
+		}
+	}
+
+	timer_delete(&mux->resubmit_timer);
+	mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+	struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
+							 resubmit_timer);
+
+	if (!spin_trylock(&mux->lock)) {
+		amdgpu_ring_mux_schedule_resubmit(mux);
+		DRM_ERROR("reschedule resubmit\n");
+		return;
+	}
+	amdgpu_mux_resubmit_chunks(mux);
+	spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+			 unsigned int entry_size)
+{
+	mux->real_ring = ring;
+	mux->num_ring_entries = 0;
+
+	mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+	if (!mux->ring_entry)
+		return -ENOMEM;
+
+	mux->ring_entry_size = entry_size;
+	mux->s_resubmit = false;
+
+	amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
+	if (!amdgpu_mux_chunk_slab) {
+		DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&mux->lock);
+	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+	return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *chunk2;
+	int i;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entry[i];
+		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		}
+	}
+	kmem_cache_destroy(amdgpu_mux_chunk_slab);
+	kfree(mux->ring_entry);
+	mux->ring_entry = NULL;
+	mux->num_ring_entries = 0;
+	mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	if (mux->num_ring_entries >= mux->ring_entry_size) {
+		DRM_ERROR("add sw ring exceeding max entry size\n");
+		return -ENOENT;
+	}
+
+	e = &mux->ring_entry[mux->num_ring_entries];
+	ring->entry_index = mux->num_ring_entries;
+	e->ring = ring;
+
+	INIT_LIST_HEAD(&e->list);
+	mux->num_ring_entries += 1;
+	return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+	struct amdgpu_mux_entry *e;
+
+	spin_lock(&mux->lock);
+
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+		amdgpu_mux_resubmit_chunks(mux);
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		spin_unlock(&mux->lock);
+		return;
+	}
+
+	/* We could skip this set wptr as preemption in process. */
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+		spin_unlock(&mux->lock);
+		return;
+	}
+
+	e->sw_cptr = e->sw_wptr;
+	/* Update cptr if the package already copied in resubmit functions */
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+		e->sw_cptr = mux->wptr_resubmit;
+	e->sw_wptr = wptr;
+	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+	/* Skip copying for the packages already resubmitted.*/
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+		amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+		amdgpu_ring_commit(mux->real_ring);
+	} else {
+		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+	}
+	spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		return 0;
+	}
+
+	return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	u64 readp, offset, start, end;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("no sw entry found!\n");
+		return 0;
+	}
+
+	readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	if (start > end) {
+		if (readp <= end)
+			readp += mux->real_ring->ring_size >> 2;
+		end += mux->real_ring->ring_size >> 2;
+	}
+
+	if (start <= readp && readp <= end) {
+		offset = readp - start;
+		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+	} else if (readp < start) {
+		e->sw_rptr = e->sw_cptr;
+	} else {
+		/* end < readptr */
+		e->sw_rptr = e->sw_wptr;
+	}
+
+	return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+	return idx < ARRAY_SIZE(sw_ring_info) ?
+		sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+	return idx < ARRAY_SIZE(sw_ring_info) ?
+		sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_ring *ring;
+	int i, need_preempt;
+
+	need_preempt = 0;
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		ring = mux->ring_entry[i].ring;
+		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+		    amdgpu_fence_count_emitted(ring) > 0)
+			return 0;
+		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+		    amdgpu_fence_last_unsignaled_time_us(ring) >
+		    AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+			need_preempt = 1;
+	}
+	return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+	int r;
+
+	spin_lock(&mux->lock);
+	mux->pending_trailing_fence_signaled = true;
+	r = amdgpu_ring_preempt_ib(mux->real_ring);
+	spin_unlock(&mux->lock);
+	return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+		if (amdgpu_mcbp_scan(mux) > 0)
+			amdgpu_mcbp_trigger_preempt(mux);
+		return;
+	}
+
+	amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+		return;
+	amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+	unsigned offset;
+
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+		return;
+
+	offset = ring->wptr & ring->buf_mask;
+
+	amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	spin_lock(&mux->lock);
+	amdgpu_mux_resubmit_chunks(mux);
+	spin_unlock(&mux->lock);
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+	if (!chunk) {
+		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+		return;
+	}
+
+	chunk->start = ring->wptr;
+	/* the initialized value used to check if they are set by the ib submission*/
+	chunk->cntl_offset = ring->buf_mask + 1;
+	chunk->de_offset = ring->buf_mask + 1;
+	chunk->ce_offset = ring->buf_mask + 1;
+	list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	uint32_t last_seq = 0;
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *tmp;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+		if (chunk->sync_seq <= last_seq) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		}
+	}
+}
+
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
+				    struct amdgpu_ring *ring, u64 offset,
+				    enum amdgpu_ring_mux_offset_type type)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+	if (!chunk) {
+		DRM_ERROR("cannot find chunk!\n");
+		return;
+	}
+
+	switch (type) {
+	case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
+		chunk->cntl_offset = offset;
+		break;
+	case AMDGPU_MUX_OFFSET_TYPE_DE:
+		chunk->de_offset = offset;
+		break;
+	case AMDGPU_MUX_OFFSET_TYPE_CE:
+		chunk->ce_offset = offset;
+		break;
+	default:
+		DRM_ERROR("invalid type (%d)\n", type);
+		break;
+	}
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+	if (!chunk) {
+		DRM_ERROR("cannot find chunk!\n");
+		return;
+	}
+
+	chunk->end = ring->wptr;
+	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+	scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_ring *ring = NULL;
+	int i;
+
+	if (!mux->pending_trailing_fence_signaled)
+		return false;
+
+	if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+		return false;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entry[i];
+		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+			ring = e->ring;
+			break;
+		}
+	}
+
+	if (!ring) {
+		DRM_ERROR("cannot find low priority ring\n");
+		return false;
+	}
+
+	amdgpu_fence_process(ring);
+	if (amdgpu_fence_count_emitted(ring) > 0) {
+		mux->s_resubmit = true;
+		mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+		amdgpu_ring_mux_schedule_resubmit(mux);
+	}
+
+	mux->pending_trailing_fence_signaled = false;
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..d3186b570b82
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+	struct amdgpu_ring      *ring;
+	u64                     start_ptr_in_hw_ring;
+	u64                     end_ptr_in_hw_ring;
+	u64                     sw_cptr;
+	u64                     sw_rptr;
+	u64                     sw_wptr;
+	struct list_head        list;
+};
+
+enum amdgpu_ring_mux_offset_type {
+	AMDGPU_MUX_OFFSET_TYPE_CONTROL,
+	AMDGPU_MUX_OFFSET_TYPE_DE,
+	AMDGPU_MUX_OFFSET_TYPE_CE,
+};
+
+enum ib_complete_status {
+	/* IB not started/reset value, default value. */
+	IB_COMPLETION_STATUS_DEFAULT = 0,
+	/* IB preempted, started but not completed. */
+	IB_COMPLETION_STATUS_PREEMPTED = 1,
+	/* IB completed. */
+	IB_COMPLETION_STATUS_COMPLETED = 2,
+};
+
+struct amdgpu_ring_mux {
+	struct amdgpu_ring      *real_ring;
+
+	struct amdgpu_mux_entry *ring_entry;
+	unsigned int            num_ring_entries;
+	unsigned int            ring_entry_size;
+	/*the lock for copy data from different software rings*/
+	spinlock_t              lock;
+	bool                    s_resubmit;
+	uint32_t                seqno_to_resubmit;
+	u64                     wptr_resubmit;
+	struct timer_list       resubmit_timer;
+
+	bool                    pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ * @control_offset:- the PRE_RESUME bit position used for resubmission.
+ * @de_offset:- the anchor in write_data for de meta of resubmission.
+ * @ce_offset:- the anchor in write_data for ce meta of resubmission.
+ */
+struct amdgpu_mux_chunk {
+	struct list_head        entry;
+	uint32_t                sync_seq;
+	u64                     start;
+	u64                     end;
+	u64                     cntl_offset;
+	u64                     de_offset;
+	u64                     ce_offset;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+			 unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+				    u64 offset, enum amdgpu_ring_mux_offset_type type);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
new file mode 100644
index 000000000000..5aa830a02d80
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_rlc.h"
+
+/**
+ * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
+ *
+ * @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
+ */
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	if (adev->gfx.rlc.in_safe_mode[xcc_id])
+		return;
+
+	/* if RLC is not enabled, do nothing */
+	if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
+		return;
+
+	if (adev->cg_flags &
+	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+		adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id);
+		adev->gfx.rlc.in_safe_mode[xcc_id] = true;
+	}
+}
+
+/**
+ * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
+ *
+ * @adev: amdgpu_device pointer
+ * @xcc_id: xcc accelerated compute core id
+ *
+ * Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
+ */
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	if (!(adev->gfx.rlc.in_safe_mode[xcc_id]))
+		return;
+
+	/* if RLC is not enabled, do nothing */
+	if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
+		return;
+
+	if (adev->cg_flags &
+	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+		adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id);
+		adev->gfx.rlc.in_safe_mode[xcc_id] = false;
+	}
+}
+
+/**
+ * amdgpu_gfx_rlc_init_sr - Init save restore block
+ *
+ * @adev: amdgpu_device pointer
+ * @dws: the size of save restore block
+ *
+ * Allocate and setup value to save restore block of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
+{
+	const u32 *src_ptr;
+	u32 *dst_ptr;
+	u32 i;
+	int r;
+
+	/* allocate save restore block */
+	r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.rlc.save_restore_obj,
+				      &adev->gfx.rlc.save_restore_gpu_addr,
+				      (void **)&adev->gfx.rlc.sr_ptr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
+		amdgpu_gfx_rlc_fini(adev);
+		return r;
+	}
+
+	/* write the sr buffer */
+	src_ptr = adev->gfx.rlc.reg_list;
+	dst_ptr = adev->gfx.rlc.sr_ptr;
+	for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
+		dst_ptr[i] = cpu_to_le32(src_ptr[i]);
+	amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
+	amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
+
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_init_csb - Init clear state block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and setup value to clear state block of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
+{
+	u32 dws;
+	int r;
+
+	/* allocate clear state block */
+	adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
+	r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.rlc.clear_state_obj,
+				      &adev->gfx.rlc.clear_state_gpu_addr,
+				      (void **)&adev->gfx.rlc.cs_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
+		amdgpu_gfx_rlc_fini(adev);
+		return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_init_cpt - Init cp table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and setup value to cp table of rlc.
+ * Returns 0 on succeess or negative error code if allocate failed.
+ */
+int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.rlc.cp_table_obj,
+				      &adev->gfx.rlc.cp_table_gpu_addr,
+				      (void **)&adev->gfx.rlc.cp_table_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
+		amdgpu_gfx_rlc_fini(adev);
+		return r;
+	}
+
+	/* set up the cp table */
+	amdgpu_gfx_rlc_setup_cp_table(adev);
+	amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+	amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Write cp firmware data into cp table.
+ */
+void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	u32 *dst_ptr;
+	int me, i, max_me;
+	u32 bo_offset = 0;
+	u32 table_offset, table_size;
+
+	max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
+
+	/* write the cp table buffer */
+	dst_ptr = adev->gfx.rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.ce_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 1) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.pfp_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 2) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.me_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 3) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else  if (me == 4) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec2_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		}
+
+		for (i = 0; i < table_size; i++) {
+			dst_ptr[bo_offset + i] =
+				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+		}
+
+		bo_offset += table_size;
+	}
+}
+
+/**
+ * amdgpu_gfx_rlc_fini - Free BO which used for RLC
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
+ * and rlc_jump_table_block.
+ */
+void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
+{
+	/* save restore block */
+	if (adev->gfx.rlc.save_restore_obj) {
+		amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
+				      &adev->gfx.rlc.save_restore_gpu_addr,
+				      (void **)&adev->gfx.rlc.sr_ptr);
+	}
+
+	/* clear state block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+			      &adev->gfx.rlc.clear_state_gpu_addr,
+			      (void **)&adev->gfx.rlc.cs_ptr);
+
+	/* jump table block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+			      &adev->gfx.rlc.cp_table_gpu_addr,
+			      (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+	const struct common_firmware_header *common_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	struct amdgpu_firmware_info *info;
+	unsigned int *tmp;
+	unsigned int i;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+	adev->gfx.rlc.save_and_restore_offset =
+		le32_to_cpu(rlc_hdr->save_and_restore_offset);
+	adev->gfx.rlc.clear_state_descriptor_offset =
+		le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+	adev->gfx.rlc.avail_scratch_ram_locations =
+		le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+	adev->gfx.rlc.reg_restore_list_size =
+		le32_to_cpu(rlc_hdr->reg_restore_list_size);
+	adev->gfx.rlc.reg_list_format_start =
+		le32_to_cpu(rlc_hdr->reg_list_format_start);
+	adev->gfx.rlc.reg_list_format_separate_start =
+		le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+	adev->gfx.rlc.starting_offsets_start =
+		le32_to_cpu(rlc_hdr->starting_offsets_start);
+	adev->gfx.rlc.reg_list_format_size_bytes =
+		le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+	adev->gfx.rlc.reg_list_size_bytes =
+		le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+	adev->gfx.rlc.register_list_format =
+		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+			adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+	if (!adev->gfx.rlc.register_list_format) {
+		dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+		return -ENOMEM;
+	}
+
+	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+		info->fw = adev->gfx.rlc_fw;
+		if (info->fw) {
+			common_hdr = (const struct common_firmware_header *)info->fw->data;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+		}
+	}
+
+	return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_1 *rlc_hdr;
+	struct amdgpu_firmware_info *info;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+		le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+		}
+	}
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_2 *rlc_hdr;
+	struct amdgpu_firmware_info *info;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+	adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+	adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+	adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+		}
+	}
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_3 *rlc_hdr;
+	struct amdgpu_firmware_info *info;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+	adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+	adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+	adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+	adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+	adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+	adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+	adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+		}
+	}
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_4 *rlc_hdr;
+	struct amdgpu_firmware_info *info;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+	adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+	adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+	adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+	adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+	adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+	adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+	adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+	adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+	adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+			info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+			info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+			info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+			info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+		}
+
+		if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+			info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
+		}
+	}
+}
+
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+				  uint16_t version_major,
+				  uint16_t version_minor)
+{
+	int err;
+
+	if (version_major < 2) {
+		/* only support rlc_hdr v2.x and onwards */
+		dev_err(adev->dev, "unsupported rlc fw hdr\n");
+		return -EINVAL;
+	}
+
+	/* is_rlc_v2_1 is still used in APU code path */
+	if (version_major == 2 && version_minor == 1)
+		adev->gfx.rlc.is_rlc_v2_1 = true;
+
+	err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
+	if (err) {
+		dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
+		return err;
+	}
+
+	if (version_minor >= 1)
+		amdgpu_gfx_rlc_init_microcode_v2_1(adev);
+	if (version_minor >= 2)
+		amdgpu_gfx_rlc_init_microcode_v2_2(adev);
+	if (version_minor == 3)
+		amdgpu_gfx_rlc_init_microcode_v2_3(adev);
+	if (version_minor == 4)
+		amdgpu_gfx_rlc_init_microcode_v2_4(adev);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
new file mode 100644
index 000000000000..2ce310b31942
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RLC_H__
+#define __AMDGPU_RLC_H__
+
+#include "clearstate_defs.h"
+
+#define AMDGPU_MAX_RLC_INSTANCES	8
+
+/* firmware ID used in rlc toc */
+typedef enum _FIRMWARE_ID_ {
+	FIRMWARE_ID_INVALID					= 0,
+	FIRMWARE_ID_RLC_G_UCODE					= 1,
+	FIRMWARE_ID_RLC_TOC					= 2,
+	FIRMWARE_ID_RLCG_SCRATCH                                = 3,
+	FIRMWARE_ID_RLC_SRM_ARAM                                = 4,
+	FIRMWARE_ID_RLC_SRM_INDEX_ADDR                          = 5,
+	FIRMWARE_ID_RLC_SRM_INDEX_DATA                          = 6,
+	FIRMWARE_ID_RLC_P_UCODE                                 = 7,
+	FIRMWARE_ID_RLC_V_UCODE                                 = 8,
+	FIRMWARE_ID_RLX6_UCODE                                  = 9,
+	FIRMWARE_ID_RLX6_DRAM_BOOT                              = 10,
+	FIRMWARE_ID_GLOBAL_TAP_DELAYS                           = 11,
+	FIRMWARE_ID_SE0_TAP_DELAYS                              = 12,
+	FIRMWARE_ID_SE1_TAP_DELAYS                              = 13,
+	FIRMWARE_ID_GLOBAL_SE0_SE1_SKEW_DELAYS                  = 14,
+	FIRMWARE_ID_SDMA0_UCODE                                 = 15,
+	FIRMWARE_ID_SDMA0_JT                                    = 16,
+	FIRMWARE_ID_SDMA1_UCODE                                 = 17,
+	FIRMWARE_ID_SDMA1_JT                                    = 18,
+	FIRMWARE_ID_CP_CE                                       = 19,
+	FIRMWARE_ID_CP_PFP                                      = 20,
+	FIRMWARE_ID_CP_ME                                       = 21,
+	FIRMWARE_ID_CP_MEC                                      = 22,
+	FIRMWARE_ID_CP_MES                                      = 23,
+	FIRMWARE_ID_MES_STACK                                   = 24,
+	FIRMWARE_ID_RLC_SRM_DRAM_SR                             = 25,
+	FIRMWARE_ID_RLCG_SCRATCH_SR                             = 26,
+	FIRMWARE_ID_RLCP_SCRATCH_SR                             = 27,
+	FIRMWARE_ID_RLCV_SCRATCH_SR                             = 28,
+	FIRMWARE_ID_RLX6_DRAM_SR                                = 29,
+	FIRMWARE_ID_SDMA0_PG_CONTEXT                            = 30,
+	FIRMWARE_ID_SDMA1_PG_CONTEXT                            = 31,
+	FIRMWARE_ID_GLOBAL_MUX_SELECT_RAM                       = 32,
+	FIRMWARE_ID_SE0_MUX_SELECT_RAM                          = 33,
+	FIRMWARE_ID_SE1_MUX_SELECT_RAM                          = 34,
+	FIRMWARE_ID_ACCUM_CTRL_RAM                              = 35,
+	FIRMWARE_ID_RLCP_CAM                                    = 36,
+	FIRMWARE_ID_RLC_SPP_CAM_EXT                             = 37,
+	FIRMWARE_ID_MAX                                         = 38,
+} FIRMWARE_ID;
+
+typedef enum _SOC21_FIRMWARE_ID_ {
+    SOC21_FIRMWARE_ID_INVALID                     = 0,
+    SOC21_FIRMWARE_ID_RLC_G_UCODE                 = 1,
+    SOC21_FIRMWARE_ID_RLC_TOC                     = 2,
+    SOC21_FIRMWARE_ID_RLCG_SCRATCH                = 3,
+    SOC21_FIRMWARE_ID_RLC_SRM_ARAM                = 4,
+    SOC21_FIRMWARE_ID_RLC_P_UCODE                 = 5,
+    SOC21_FIRMWARE_ID_RLC_V_UCODE                 = 6,
+    SOC21_FIRMWARE_ID_RLX6_UCODE                  = 7,
+    SOC21_FIRMWARE_ID_RLX6_UCODE_CORE1            = 8,
+    SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT              = 9,
+    SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1        = 10,
+    SOC21_FIRMWARE_ID_SDMA_UCODE_TH0              = 11,
+    SOC21_FIRMWARE_ID_SDMA_UCODE_TH1              = 12,
+    SOC21_FIRMWARE_ID_CP_PFP                      = 13,
+    SOC21_FIRMWARE_ID_CP_ME                       = 14,
+    SOC21_FIRMWARE_ID_CP_MEC                      = 15,
+    SOC21_FIRMWARE_ID_RS64_MES_P0                 = 16,
+    SOC21_FIRMWARE_ID_RS64_MES_P1                 = 17,
+    SOC21_FIRMWARE_ID_RS64_PFP                    = 18,
+    SOC21_FIRMWARE_ID_RS64_ME                     = 19,
+    SOC21_FIRMWARE_ID_RS64_MEC                    = 20,
+    SOC21_FIRMWARE_ID_RS64_MES_P0_STACK           = 21,
+    SOC21_FIRMWARE_ID_RS64_MES_P1_STACK           = 22,
+    SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK           = 23,
+    SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK           = 24,
+    SOC21_FIRMWARE_ID_RS64_ME_P0_STACK            = 25,
+    SOC21_FIRMWARE_ID_RS64_ME_P1_STACK            = 26,
+    SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK           = 27,
+    SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK           = 28,
+    SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK           = 29,
+    SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK           = 30,
+    SOC21_FIRMWARE_ID_RLC_SRM_DRAM_SR             = 31,
+    SOC21_FIRMWARE_ID_RLCG_SCRATCH_SR             = 32,
+    SOC21_FIRMWARE_ID_RLCP_SCRATCH_SR             = 33,
+    SOC21_FIRMWARE_ID_RLCV_SCRATCH_SR             = 34,
+    SOC21_FIRMWARE_ID_RLX6_DRAM_SR                = 35,
+    SOC21_FIRMWARE_ID_RLX6_DRAM_SR_CORE1          = 36,
+    SOC21_FIRMWARE_ID_MAX                         = 37
+} SOC21_FIRMWARE_ID;
+
+typedef enum _SOC24_FIRMWARE_ID_ {
+    SOC24_FIRMWARE_ID_INVALID                     = 0,
+    SOC24_FIRMWARE_ID_RLC_G_UCODE                 = 1,
+    SOC24_FIRMWARE_ID_RLC_TOC                     = 2,
+    SOC24_FIRMWARE_ID_RLCG_SCRATCH                = 3,
+    SOC24_FIRMWARE_ID_RLC_SRM_ARAM                = 4,
+    SOC24_FIRMWARE_ID_RLC_P_UCODE                 = 5,
+    SOC24_FIRMWARE_ID_RLC_V_UCODE                 = 6,
+    SOC24_FIRMWARE_ID_RLX6_UCODE                  = 7,
+    SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1            = 8,
+    SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT              = 9,
+    SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1        = 10,
+    SOC24_FIRMWARE_ID_SDMA_UCODE_TH0              = 11,
+    SOC24_FIRMWARE_ID_SDMA_UCODE_TH1              = 12,
+    SOC24_FIRMWARE_ID_CP_PFP                      = 13,
+    SOC24_FIRMWARE_ID_CP_ME                       = 14,
+    SOC24_FIRMWARE_ID_CP_MEC                      = 15,
+    SOC24_FIRMWARE_ID_RS64_MES_P0                 = 16,
+    SOC24_FIRMWARE_ID_RS64_MES_P1                 = 17,
+    SOC24_FIRMWARE_ID_RS64_PFP                    = 18,
+    SOC24_FIRMWARE_ID_RS64_ME                     = 19,
+    SOC24_FIRMWARE_ID_RS64_MEC                    = 20,
+    SOC24_FIRMWARE_ID_RS64_MES_P0_STACK           = 21,
+    SOC24_FIRMWARE_ID_RS64_MES_P1_STACK           = 22,
+    SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK           = 23,
+    SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK           = 24,
+    SOC24_FIRMWARE_ID_RS64_ME_P0_STACK            = 25,
+    SOC24_FIRMWARE_ID_RS64_ME_P1_STACK            = 26,
+    SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK           = 27,
+    SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK           = 28,
+    SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK           = 29,
+    SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK           = 30,
+    SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR             = 31,
+    SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR             = 32,
+    SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR             = 33,
+    SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR             = 34,
+    SOC24_FIRMWARE_ID_RLX6_DRAM_SR                = 35,
+    SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1          = 36,
+    SOC24_FIRMWARE_ID_RLCDEBUGLOG                 = 37,
+    SOC24_FIRMWARE_ID_SRIOV_DEBUG                 = 38,
+    SOC24_FIRMWARE_ID_SRIOV_CSA_RLC               = 39,
+    SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA              = 40,
+    SOC24_FIRMWARE_ID_SRIOV_CSA_CP                = 41,
+    SOC24_FIRMWARE_ID_UMF_ZONE_PAD                = 42,
+    SOC24_FIRMWARE_ID_MAX                         = 43
+} SOC24_FIRMWARE_ID;
+
+typedef struct _RLC_TABLE_OF_CONTENT {
+	union {
+		unsigned int	DW0;
+		struct {
+			unsigned int	offset		: 25;
+			unsigned int	id		: 7;
+		};
+	};
+
+	union {
+		unsigned int	DW1;
+		struct {
+			unsigned int	load_at_boot		: 1;
+			unsigned int	load_at_vddgfx		: 1;
+			unsigned int	load_at_reset		: 1;
+			unsigned int	memory_destination	: 2;
+			unsigned int	vfflr_image_code	: 4;
+			unsigned int	load_mode_direct	: 1;
+			unsigned int	save_for_vddgfx		: 1;
+			unsigned int	save_for_vfflr		: 1;
+			unsigned int	reserved		: 1;
+			unsigned int	signed_source		: 1;
+			unsigned int	size			: 18;
+		};
+	};
+
+	union {
+		unsigned int	DW2;
+		struct {
+			unsigned int	indirect_addr_reg	: 16;
+			unsigned int	index			: 16;
+		};
+	};
+
+	union {
+		unsigned int	DW3;
+		struct {
+			unsigned int	indirect_data_reg	: 16;
+			unsigned int	indirect_start_offset	: 16;
+		};
+	};
+} RLC_TABLE_OF_CONTENT;
+
+typedef struct _RLC_TABLE_OF_CONTENT_V2 {
+	union {
+		unsigned int    DW0;
+		struct {
+			uint32_t offset         : 25;
+			uint32_t id             : 7;
+		};
+	};
+
+	union {
+		unsigned int    DW1;
+		struct {
+			uint32_t reserved0              : 1;
+			uint32_t reserved1              : 1;
+			uint32_t reserved2              : 1;
+			uint32_t memory_destination     : 2;
+			uint32_t vfflr_image_code       : 4;
+			uint32_t reserved9              : 1;
+			uint32_t reserved10             : 1;
+			uint32_t reserved11             : 1;
+			uint32_t size_x16               : 1;
+			uint32_t reserved13             : 1;
+			uint32_t size                   : 18;
+		};
+	};
+} RLC_TABLE_OF_CONTENT_V2;
+
+#define RLC_TOC_MAX_SIZE		64
+
+struct amdgpu_rlc_funcs {
+	bool (*is_rlc_enabled)(struct amdgpu_device *adev);
+	void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+	void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
+	int  (*init)(struct amdgpu_device *adev);
+	u32  (*get_csb_size)(struct amdgpu_device *adev);
+
+	/**
+	 * @get_csb_buffer: Get the clear state to be put into the hardware.
+	 *
+	 * The parameter adev is used to get the CS data and other gfx info,
+	 * and buffer is the RLC CS pointer
+	 *
+	 * Sometimes, the user space puts a request to clear the state in the
+	 * command buffer; this function provides the clear state that gets put
+	 * into the hardware. Note that the driver programs Clear State
+	 * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+	 * and it also provides a pointer to it which is used by the firmware
+	 * to load the clear state in some cases.
+	 */
+	void (*get_csb_buffer)(struct amdgpu_device *adev, u32 *buffer);
+	int  (*get_cp_table_num)(struct amdgpu_device *adev);
+	int  (*resume)(struct amdgpu_device *adev);
+	void (*stop)(struct amdgpu_device *adev);
+	void (*reset)(struct amdgpu_device *adev);
+	void (*start)(struct amdgpu_device *adev);
+	void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid);
+	bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
+};
+
+struct amdgpu_rlcg_reg_access_ctrl {
+	uint32_t scratch_reg0;
+	uint32_t scratch_reg1;
+	uint32_t scratch_reg2;
+	uint32_t scratch_reg3;
+	uint32_t grbm_cntl;
+	uint32_t grbm_idx;
+	uint32_t spare_int;
+};
+
+struct amdgpu_rlc {
+	/* for power gating */
+	struct amdgpu_bo        *save_restore_obj;
+	uint64_t                save_restore_gpu_addr;
+	uint32_t		*sr_ptr;
+	const u32               *reg_list;
+	u32                     reg_list_size;
+	/* for clear state */
+	struct amdgpu_bo        *clear_state_obj;
+	uint64_t                clear_state_gpu_addr;
+	uint32_t		*cs_ptr;
+	const struct cs_section_def   *cs_data;
+	u32                     clear_state_size;
+	/* for cp tables */
+	struct amdgpu_bo        *cp_table_obj;
+	uint64_t                cp_table_gpu_addr;
+	uint32_t		*cp_table_ptr;
+	u32                     cp_table_size;
+
+	/* safe mode for updating CG/PG state */
+	bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES];
+	const struct amdgpu_rlc_funcs *funcs;
+
+	/* for firmware data */
+	u32 save_and_restore_offset;
+	u32 clear_state_descriptor_offset;
+	u32 avail_scratch_ram_locations;
+	u32 reg_restore_list_size;
+	u32 reg_list_format_start;
+	u32 reg_list_format_separate_start;
+	u32 starting_offsets_start;
+	u32 reg_list_format_size_bytes;
+	u32 reg_list_size_bytes;
+	u32 reg_list_format_direct_reg_list_length;
+	u32 save_restore_list_cntl_size_bytes;
+	u32 save_restore_list_gpm_size_bytes;
+	u32 save_restore_list_srm_size_bytes;
+	u32 rlc_iram_ucode_size_bytes;
+	u32 rlc_dram_ucode_size_bytes;
+	u32 rlcp_ucode_size_bytes;
+	u32 rlcv_ucode_size_bytes;
+	u32 global_tap_delays_ucode_size_bytes;
+	u32 se0_tap_delays_ucode_size_bytes;
+	u32 se1_tap_delays_ucode_size_bytes;
+	u32 se2_tap_delays_ucode_size_bytes;
+	u32 se3_tap_delays_ucode_size_bytes;
+
+	u32 *register_list_format;
+	u32 *register_restore;
+	u8 *save_restore_list_cntl;
+	u8 *save_restore_list_gpm;
+	u8 *save_restore_list_srm;
+	u8 *rlc_iram_ucode;
+	u8 *rlc_dram_ucode;
+	u8 *rlcp_ucode;
+	u8 *rlcv_ucode;
+	u8 *global_tap_delays_ucode;
+	u8 *se0_tap_delays_ucode;
+	u8 *se1_tap_delays_ucode;
+	u8 *se2_tap_delays_ucode;
+	u8 *se3_tap_delays_ucode;
+
+	bool is_rlc_v2_1;
+
+	/* for rlc autoload */
+	struct amdgpu_bo	*rlc_autoload_bo;
+	u64			rlc_autoload_gpu_addr;
+	void			*rlc_autoload_ptr;
+
+	/* rlc toc buffer */
+	struct amdgpu_bo	*rlc_toc_bo;
+	uint64_t		rlc_toc_gpu_addr;
+	void			*rlc_toc_buf;
+
+	bool rlcg_reg_access_supported;
+	/* registers for rlcg indirect reg access */
+	struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES];
+};
+
+void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id);
+void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id);
+int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
+int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
+int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
+void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
+				  uint16_t version_major,
+				  uint16_t version_minor);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
new file mode 100644
index 000000000000..39070b2a4c04
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ */
+/* Algorithm:
+ *
+ * We store the last allocated bo in "hole", we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
+
+#include "amdgpu.h"
+
+int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
+			      struct amdgpu_sa_manager *sa_manager,
+			      unsigned int size, u32 suballoc_align, u32 domain)
+{
+	int r;
+
+	r = amdgpu_bo_create_kernel(adev, size, AMDGPU_GPU_PAGE_SIZE, domain,
+				    &sa_manager->bo, &sa_manager->gpu_addr,
+				    &sa_manager->cpu_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
+		return r;
+	}
+
+	memset(sa_manager->cpu_ptr, 0, size);
+	drm_suballoc_manager_init(&sa_manager->base, size, suballoc_align);
+	return r;
+}
+
+void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
+			       struct amdgpu_sa_manager *sa_manager)
+{
+	if (sa_manager->bo == NULL) {
+		dev_err(adev->dev, "no bo for sa manager\n");
+		return;
+	}
+
+	drm_suballoc_manager_fini(&sa_manager->base);
+
+	amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+}
+
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+		     struct drm_suballoc **sa_bo,
+		     unsigned int size)
+{
+	struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
+						   GFP_KERNEL, false, 0);
+
+	if (IS_ERR(sa)) {
+		*sa_bo = NULL;
+
+		return PTR_ERR(sa);
+	}
+
+	*sa_bo = sa;
+	return 0;
+}
+
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
+{
+	if (sa_bo == NULL || *sa_bo == NULL) {
+		return;
+	}
+
+	drm_suballoc_free(*sa_bo, fence);
+	*sa_bo = NULL;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
+				  struct seq_file *m)
+{
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	drm_suballoc_dump_debug_info(&sa_manager->base, &p, sa_manager->gpu_addr);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
new file mode 100644
index 000000000000..341beec59537
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#include <linux/file.h>
+#include <linux/pid.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_sched.h"
+#include "amdgpu_vm.h"
+
+static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  int32_t priority)
+{
+	CLASS(fd, f)(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx_mgr *mgr;
+	struct amdgpu_ctx *ctx;
+	uint32_t id;
+	int r;
+
+	if (fd_empty(f))
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+	if (r)
+		return r;
+
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	idr_for_each_entry(&mgr->ctx_handles, ctx, id)
+		amdgpu_ctx_priority_override(ctx, priority);
+	mutex_unlock(&mgr->lock);
+
+	return 0;
+}
+
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  unsigned ctx_id,
+						  int32_t priority)
+{
+	CLASS(fd, f)(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	if (fd_empty(f))
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
+	if (r)
+		return r;
+
+	ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+	if (!ctx)
+		return -EINVAL;
+
+	amdgpu_ctx_priority_override(ctx, priority);
+	amdgpu_ctx_put(ctx);
+	return 0;
+}
+
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp)
+{
+	union drm_amdgpu_sched *args = data;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int r;
+
+	/* First check the op, then the op's argument.
+	 */
+	switch (args->in.op) {
+	case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		break;
+	default:
+		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
+		return -EINVAL;
+	}
+
+	if (!amdgpu_ctx_priority_is_valid(args->in.priority)) {
+		WARN(1, "Invalid context priority %d\n", args->in.priority);
+		return -EINVAL;
+	}
+
+	switch (args->in.op) {
+	case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_process_priority_override(adev,
+							   args->in.fd,
+							   args->in.priority);
+		break;
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_context_priority_override(adev,
+							   args->in.fd,
+							   args->in.ctx_id,
+							   args->in.priority);
+		break;
+	default:
+		/* Impossible.
+		 */
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
new file mode 100644
index 000000000000..67e5b2472f6a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez <andresx7@gmail.com>
+ */
+
+#ifndef __AMDGPU_SCHED_H__
+#define __AMDGPU_SCHED_H__
+
+enum drm_sched_priority;
+
+struct drm_device;
+struct drm_file;
+
+int amdgpu_to_sched_priority(int amdgpu_priority,
+			     enum drm_sched_priority *prio);
+int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp);
+
+#endif // __AMDGPU_SCHED_H__
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
new file mode 100644
index 000000000000..8b8a04138711
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -0,0 +1,611 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
+
+#define AMDGPU_CSA_SDMA_SIZE 64
+/* SDMA CSA reside in the 3rd page of CSA */
+#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
+
+/*
+ * GPU SDMA IP block helpers function.
+ */
+
+struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		if (ring == &adev->sdma.instance[i].ring ||
+		    ring == &adev->sdma.instance[i].page)
+			return &adev->sdma.instance[i];
+
+	return NULL;
+}
+
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		if (ring == &adev->sdma.instance[i].ring ||
+			ring == &adev->sdma.instance[i].page) {
+			*index = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
+				     unsigned int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint64_t csa_mc_addr;
+	uint32_t index = 0;
+	int r;
+
+	/* don't enable OS preemption on SDMA under SRIOV */
+	if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+		return 0;
+
+	r = amdgpu_sdma_get_index_from_ring(ring, &index);
+
+	if (r || index > 31)
+		csa_mc_addr = 0;
+	else
+		csa_mc_addr = amdgpu_csa_vaddr(adev) +
+			AMDGPU_CSA_SDMA_OFFSET +
+			index * AMDGPU_CSA_SDMA_SIZE;
+
+	return csa_mc_addr;
+}
+
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+			      struct ras_common_if *ras_block)
+{
+	int r, i;
+
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		for (i = 0; i < adev->sdma.num_instances; i++) {
+			r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq,
+				AMDGPU_SDMA_IRQ_INSTANCE0 + i);
+			if (r)
+				goto late_fini;
+		}
+	}
+
+	return 0;
+
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
+
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry)
+{
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+	if (amdgpu_sriov_vf(adev))
+		return AMDGPU_RAS_SUCCESS;
+
+	amdgpu_ras_reset_gpu(adev);
+
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->sdma.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
+
+static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
+{
+	uint16_t version_major;
+	const struct common_firmware_header *header = NULL;
+	const struct sdma_firmware_header_v1_0 *hdr;
+	const struct sdma_firmware_header_v2_0 *hdr_v2;
+	const struct sdma_firmware_header_v3_0 *hdr_v3;
+
+	header = (const struct common_firmware_header *)
+		sdma_inst->fw->data;
+	version_major = le16_to_cpu(header->header_version_major);
+
+	switch (version_major) {
+	case 1:
+		hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data;
+		sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version);
+		sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version);
+		break;
+	case 2:
+		hdr_v2 = (const struct sdma_firmware_header_v2_0 *)sdma_inst->fw->data;
+		sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
+		sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
+		break;
+	case 3:
+		hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data;
+		sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version);
+		sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (sdma_inst->feature_version >= 20)
+		sdma_inst->burst_nop = true;
+
+	return 0;
+}
+
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+				  bool duplicate)
+{
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+		if (duplicate)
+			break;
+	}
+
+	memset((void *)adev->sdma.instance, 0,
+	       sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);
+}
+
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
+			       u32 instance, bool duplicate)
+{
+	struct amdgpu_firmware_info *info = NULL;
+	const struct common_firmware_header *header = NULL;
+	int err, i;
+	const struct sdma_firmware_header_v2_0 *sdma_hdr;
+	const struct sdma_firmware_header_v3_0 *sdma_hv3;
+	uint16_t version_major;
+	char ucode_prefix[30];
+
+	amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+	if (instance == 0)
+		err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+					   AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s.bin", ucode_prefix);
+	else
+		err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+					   AMDGPU_UCODE_REQUIRED,
+					   "amdgpu/%s%d.bin", ucode_prefix, instance);
+	if (err)
+		goto out;
+
+	header = (const struct common_firmware_header *)
+		adev->sdma.instance[instance].fw->data;
+	version_major = le16_to_cpu(header->header_version_major);
+
+	if ((duplicate && instance) || (!duplicate && version_major > 1)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = amdgpu_sdma_init_inst_ctx(&adev->sdma.instance[instance]);
+	if (err)
+		goto out;
+
+	if (duplicate) {
+		for (i = 1; i < adev->sdma.num_instances; i++)
+			memcpy((void *)&adev->sdma.instance[i],
+			       (void *)&adev->sdma.instance[0],
+			       sizeof(struct amdgpu_sdma_instance));
+	}
+
+	DRM_DEBUG("psp_load == '%s'\n",
+		  adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		switch (version_major) {
+		case 1:
+			for (i = 0; i < adev->sdma.num_instances; i++) {
+				if (!duplicate && (instance != i))
+					continue;
+				else {
+					/* Use a single copy per SDMA firmware type. PSP uses the same instance for all
+					 * groups of SDMAs */
+					if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+						IP_VERSION(4, 4, 2) ||
+					     amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+						IP_VERSION(4, 4, 4) ||
+					     amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+						IP_VERSION(4, 4, 5)) &&
+					    adev->firmware.load_type ==
+						AMDGPU_FW_LOAD_PSP &&
+					    adev->sdma.num_inst_per_aid == i) {
+						break;
+					}
+					info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+					info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+					info->fw = adev->sdma.instance[i].fw;
+					adev->firmware.fw_size +=
+						ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+				}
+			}
+			break;
+		case 2:
+			sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+				adev->sdma.instance[0].fw->data;
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH0];
+			info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH0;
+			info->fw = adev->sdma.instance[0].fw;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_UCODE_TH1];
+			info->ucode_id = AMDGPU_UCODE_ID_SDMA_UCODE_TH1;
+			info->fw = adev->sdma.instance[0].fw;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+			break;
+		case 3:
+			sdma_hv3 = (const struct sdma_firmware_header_v3_0 *)
+				adev->sdma.instance[0].fw->data;
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64];
+			info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64;
+			info->fw = adev->sdma.instance[0].fw;
+			adev->firmware.fw_size +=
+				ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE);
+			break;
+		default:
+			err = -EINVAL;
+		}
+	}
+
+out:
+	if (err)
+		amdgpu_sdma_destroy_inst_ctx(adev, duplicate);
+	return err;
+}
+
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err = 0;
+	struct amdgpu_sdma_ras *ras = NULL;
+
+	/* adev->sdma.ras is NULL, which means sdma does not
+	 * support ras function, then do nothing here.
+	 */
+	if (!adev->sdma.ras)
+		return 0;
+
+	ras = adev->sdma.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register sdma ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "sdma");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->sdma.ras_if = &ras->ras_block.ras_comm;
+
+	/* If not define special ras_late_init function, use default ras_late_init */
+	if (!ras->ras_block.ras_late_init)
+		ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
+
+	/* If not defined special ras_cb function, use default ras_cb */
+	if (!ras->ras_block.ras_cb)
+		ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb;
+
+	return 0;
+}
+
+/*
+ * debugfs for to enable/disable sdma job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u64 i, num_ring;
+	u64 mask = 0;
+	struct amdgpu_ring *ring, *page = NULL;
+
+	if (!adev)
+		return -ENODEV;
+
+	/* Determine the number of rings per SDMA instance
+	 * (1 for sdma gfx ring, 2 if page queue exists)
+	 */
+	if (adev->sdma.has_page_queue)
+		num_ring = 2;
+	else
+		num_ring = 1;
+
+	/* Calculate the maximum possible mask value
+	 * based on the number of SDMA instances and rings
+	*/
+	mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1;
+
+	if ((val & mask) == 0)
+		return -EINVAL;
+
+	for (i = 0; i < adev->sdma.num_instances; ++i) {
+		ring = &adev->sdma.instance[i].ring;
+		if (adev->sdma.has_page_queue)
+			page = &adev->sdma.instance[i].page;
+		if (val & BIT_ULL(i * num_ring))
+			ring->sched.ready = true;
+		else
+			ring->sched.ready = false;
+
+		if (page) {
+			if (val & BIT_ULL(i * num_ring + 1))
+				page->sched.ready = true;
+			else
+				page->sched.ready = false;
+		}
+	}
+	/* publish sched.ready flag update effective immediately across smp */
+	smp_rmb();
+	return 0;
+}
+
+static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u64 i, num_ring;
+	u64 mask = 0;
+	struct amdgpu_ring *ring, *page = NULL;
+
+	if (!adev)
+		return -ENODEV;
+
+	/* Determine the number of rings per SDMA instance
+	 * (1 for sdma gfx ring, 2 if page queue exists)
+	 */
+	if (adev->sdma.has_page_queue)
+		num_ring = 2;
+	else
+		num_ring = 1;
+
+	for (i = 0; i < adev->sdma.num_instances; ++i) {
+		ring = &adev->sdma.instance[i].ring;
+		if (adev->sdma.has_page_queue)
+			page = &adev->sdma.instance[i].page;
+
+		if (ring->sched.ready)
+			mask |= BIT_ULL(i * num_ring);
+		else
+			mask &= ~BIT_ULL(i * num_ring);
+
+		if (page) {
+			if (page->sched.ready)
+				mask |= BIT_ULL(i * num_ring + 1);
+			else
+				mask &= ~BIT_ULL(i * num_ring + 1);
+		}
+	}
+
+	*val = mask;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops,
+			 amdgpu_debugfs_sdma_sched_mask_get,
+			 amdgpu_debugfs_sdma_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	if (!(adev->sdma.num_instances > 1))
+		return;
+	sprintf(name, "amdgpu_sdma_sched_mask");
+	debugfs_create_file(name, 0600, root, adev,
+			    &amdgpu_debugfs_sdma_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+		   amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (!amdgpu_gpu_recovery)
+		return r;
+
+	if (adev->sdma.num_instances) {
+		r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_gpu_recovery)
+		return;
+
+	if (adev->dev->kobj.sd) {
+		if (adev->sdma.num_instances)
+			device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+	}
+}
+
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (adev->sdma.has_page_queue &&
+	    (ring->me < adev->sdma.num_instances) &&
+	    (ring == &adev->sdma.instance[ring->me].ring))
+		return &adev->sdma.instance[ring->me].page;
+	else
+		return NULL;
+}
+
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	int i = ring->me;
+
+	if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+		return false;
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+		return (ring == &adev->sdma.instance[i].page);
+	else
+		return false;
+}
+
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
+{
+	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+
+	if (sdma_instance->funcs->soft_reset_kernel_queue)
+		return sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: Logical ID of the SDMA engine instance to reset
+ * @caller_handles_kernel_queues: Skip kernel queue processing. Caller
+ * will handle it.
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+			     bool caller_handles_kernel_queues)
+{
+	int ret = 0;
+	struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+	struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+	struct amdgpu_ring *page_ring = &sdma_instance->page;
+
+	mutex_lock(&sdma_instance->engine_reset_mutex);
+
+	if (!caller_handles_kernel_queues) {
+		/* Stop the scheduler's work queue for the GFX and page rings if they are running.
+		 * This ensures that no new tasks are submitted to the queues while
+		 * the reset is in progress.
+		 */
+		drm_sched_wqueue_stop(&gfx_ring->sched);
+
+		if (adev->sdma.has_page_queue)
+			drm_sched_wqueue_stop(&page_ring->sched);
+	}
+
+	if (sdma_instance->funcs->stop_kernel_queue) {
+		sdma_instance->funcs->stop_kernel_queue(gfx_ring);
+		if (adev->sdma.has_page_queue)
+			sdma_instance->funcs->stop_kernel_queue(page_ring);
+	}
+
+	/* Perform the SDMA reset for the specified instance */
+	ret = amdgpu_sdma_soft_reset(adev, instance_id);
+	if (ret) {
+		dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
+		goto exit;
+	}
+
+	if (sdma_instance->funcs->start_kernel_queue) {
+		sdma_instance->funcs->start_kernel_queue(gfx_ring);
+		if (adev->sdma.has_page_queue)
+			sdma_instance->funcs->start_kernel_queue(page_ring);
+	}
+
+exit:
+	if (!caller_handles_kernel_queues) {
+		/* Restart the scheduler's work queue for the GFX and page rings
+		 * if they were stopped by this function. This allows new tasks
+		 * to be submitted to the queues after the reset is complete.
+		 */
+		if (!ret) {
+			amdgpu_fence_driver_force_completion(gfx_ring);
+			drm_sched_wqueue_start(&gfx_ring->sched);
+			if (adev->sdma.has_page_queue) {
+				amdgpu_fence_driver_force_completion(page_ring);
+				drm_sched_wqueue_start(&page_ring->sched);
+			}
+		}
+	}
+	mutex_unlock(&sdma_instance->engine_reset_mutex);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
new file mode 100644
index 000000000000..34311f32be4c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SDMA_H__
+#define __AMDGPU_SDMA_H__
+#include "amdgpu_ras.h"
+
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES		16
+
+enum amdgpu_sdma_irq {
+	AMDGPU_SDMA_IRQ_INSTANCE0  = 0,
+	AMDGPU_SDMA_IRQ_INSTANCE1,
+	AMDGPU_SDMA_IRQ_INSTANCE2,
+	AMDGPU_SDMA_IRQ_INSTANCE3,
+	AMDGPU_SDMA_IRQ_INSTANCE4,
+	AMDGPU_SDMA_IRQ_INSTANCE5,
+	AMDGPU_SDMA_IRQ_INSTANCE6,
+	AMDGPU_SDMA_IRQ_INSTANCE7,
+	AMDGPU_SDMA_IRQ_INSTANCE8,
+	AMDGPU_SDMA_IRQ_INSTANCE9,
+	AMDGPU_SDMA_IRQ_INSTANCE10,
+	AMDGPU_SDMA_IRQ_INSTANCE11,
+	AMDGPU_SDMA_IRQ_INSTANCE12,
+	AMDGPU_SDMA_IRQ_INSTANCE13,
+	AMDGPU_SDMA_IRQ_INSTANCE14,
+	AMDGPU_SDMA_IRQ_INSTANCE15,
+	AMDGPU_SDMA_IRQ_LAST
+};
+
+#define NUM_SDMA(x) hweight32(x)
+
+struct amdgpu_sdma_funcs {
+	int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+	int (*start_kernel_queue)(struct amdgpu_ring *ring);
+	int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
+struct amdgpu_sdma_instance {
+	/* SDMA firmware */
+	const struct firmware	*fw;
+	uint32_t		fw_version;
+	uint32_t		feature_version;
+
+	struct amdgpu_ring	ring;
+	struct amdgpu_ring	page;
+	bool			burst_nop;
+	uint32_t		aid_id;
+
+	struct amdgpu_bo	*sdma_fw_obj;
+	uint64_t		sdma_fw_gpu_addr;
+	uint32_t		*sdma_fw_ptr;
+	struct mutex		engine_reset_mutex;
+	/* track guilty state of GFX and PAGE queues */
+	bool			gfx_guilty;
+	bool			page_guilty;
+	const struct amdgpu_sdma_funcs   *funcs;
+};
+
+enum amdgpu_sdma_ras_memory_id {
+	AMDGPU_SDMA_MBANK_DATA_BUF0 = 1,
+	AMDGPU_SDMA_MBANK_DATA_BUF1 = 2,
+	AMDGPU_SDMA_MBANK_DATA_BUF2 = 3,
+	AMDGPU_SDMA_MBANK_DATA_BUF3 = 4,
+	AMDGPU_SDMA_MBANK_DATA_BUF4 = 5,
+	AMDGPU_SDMA_MBANK_DATA_BUF5 = 6,
+	AMDGPU_SDMA_MBANK_DATA_BUF6 = 7,
+	AMDGPU_SDMA_MBANK_DATA_BUF7 = 8,
+	AMDGPU_SDMA_MBANK_DATA_BUF8 = 9,
+	AMDGPU_SDMA_MBANK_DATA_BUF9 = 10,
+	AMDGPU_SDMA_MBANK_DATA_BUF10 = 11,
+	AMDGPU_SDMA_MBANK_DATA_BUF11 = 12,
+	AMDGPU_SDMA_MBANK_DATA_BUF12 = 13,
+	AMDGPU_SDMA_MBANK_DATA_BUF13 = 14,
+	AMDGPU_SDMA_MBANK_DATA_BUF14 = 15,
+	AMDGPU_SDMA_MBANK_DATA_BUF15 = 16,
+	AMDGPU_SDMA_UCODE_BUF = 17,
+	AMDGPU_SDMA_RB_CMD_BUF = 18,
+	AMDGPU_SDMA_IB_CMD_BUF = 19,
+	AMDGPU_SDMA_UTCL1_RD_FIFO = 20,
+	AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21,
+	AMDGPU_SDMA_UTCL1_WR_FIFO = 22,
+	AMDGPU_SDMA_DATA_LUT_FIFO = 23,
+	AMDGPU_SDMA_SPLIT_DAT_BUF = 24,
+	AMDGPU_SDMA_MEMORY_BLOCK_LAST,
+};
+
+struct amdgpu_sdma_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_sdma {
+	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+	struct amdgpu_irq_src	trap_irq;
+	struct amdgpu_irq_src	illegal_inst_irq;
+	struct amdgpu_irq_src	fence_irq;
+	struct amdgpu_irq_src	ecc_irq;
+	struct amdgpu_irq_src	vm_hole_irq;
+	struct amdgpu_irq_src	doorbell_invalid_irq;
+	struct amdgpu_irq_src	pool_timeout_irq;
+	struct amdgpu_irq_src	srbm_write_irq;
+	struct amdgpu_irq_src	ctxt_empty_irq;
+
+	int			num_instances;
+	uint32_t 		sdma_mask;
+	int			num_inst_per_aid;
+	uint32_t                    srbm_soft_reset;
+	bool			has_page_queue;
+	struct ras_common_if	*ras_if;
+	struct amdgpu_sdma_ras	*ras;
+	uint32_t		*ip_dump;
+	uint32_t 		supported_reset;
+	struct list_head	reset_callback_list;
+	bool			no_user_submission;
+	bool			disable_uq;
+};
+
+/*
+ * Provided by hw blocks that can move/clear data.  e.g., gfx or sdma
+ * But currently, we use sdma to move data.
+ */
+struct amdgpu_buffer_funcs {
+	/* maximum bytes in a single operation */
+	uint32_t	copy_max_bytes;
+
+	/* number of dw to reserve per operation */
+	unsigned	copy_num_dw;
+
+	/* used for buffer migration */
+	void (*emit_copy_buffer)(struct amdgpu_ib *ib,
+				 /* src addr in bytes */
+				 uint64_t src_offset,
+				 /* dst addr in bytes */
+				 uint64_t dst_offset,
+				 /* number of byte to transfer */
+				 uint32_t byte_count,
+				 uint32_t copy_flags);
+
+	/* maximum bytes in a single operation */
+	uint32_t	fill_max_bytes;
+
+	/* number of dw to reserve per operation */
+	unsigned	fill_num_dw;
+
+	/* used for buffer clearing */
+	void (*emit_fill_buffer)(struct amdgpu_ib *ib,
+				 /* value to write to memory */
+				 uint32_t src_data,
+				 /* dst addr in bytes */
+				 uint64_t dst_offset,
+				 /* number of byte to fill */
+				 uint32_t byte_count);
+};
+
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id,
+			     bool caller_handles_kernel_queues);
+
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
+
+struct amdgpu_sdma_instance *
+amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
+int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
+int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
+			      struct ras_common_if *ras_block);
+int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev,
+		void *err_data,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_process_ecc_irq(struct amdgpu_device *adev,
+				      struct amdgpu_irq_src *source,
+				      struct amdgpu_iv_entry *entry);
+int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
+			       bool duplicate);
+void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
+        bool duplicate);
+int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
+	struct amdgpu_ring *ring);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
new file mode 100644
index 000000000000..3739be1b71e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_securedisplay.h"
+
+/**
+ * DOC: AMDGPU SECUREDISPLAY debugfs test interface
+ *
+ * how to use?
+ * echo opcode <value> > <debugfs_dir>/dri/xxx/securedisplay_test
+ * eg. echo 1 > <debugfs_dir>/dri/xxx/securedisplay_test
+ * eg. echo 2 phy_id > <debugfs_dir>/dri/xxx/securedisplay_test
+ *
+ * opcode:
+ * 1：Query whether TA is responding used only for validation pupose
+ * 2: Send region of Interest and CRC value to I2C. (uint32)phy_id is
+ * send to determine which DIO scratch register should be used to get
+ * ROI and receive i2c_buf as the output.
+ *
+ * You can refer more detail from header file ta_securedisplay_if.h
+ *
+ */
+
+void psp_securedisplay_parse_resp_status(struct psp_context *psp,
+	enum ta_securedisplay_status status)
+{
+	switch (status) {
+	case TA_SECUREDISPLAY_STATUS__SUCCESS:
+		break;
+	case TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE:
+		dev_err(psp->adev->dev, "Secure display: Generic Failure.");
+		break;
+	case TA_SECUREDISPLAY_STATUS__INVALID_PARAMETER:
+		dev_err(psp->adev->dev, "Secure display: Invalid Parameter.");
+		break;
+	case TA_SECUREDISPLAY_STATUS__NULL_POINTER:
+		dev_err(psp->adev->dev, "Secure display: Null Pointer.");
+		break;
+	case TA_SECUREDISPLAY_STATUS__I2C_WRITE_ERROR:
+		dev_err(psp->adev->dev, "Secure display: Failed to write to I2C.");
+		break;
+	case TA_SECUREDISPLAY_STATUS__READ_DIO_SCRATCH_ERROR:
+		dev_err(psp->adev->dev, "Secure display: Failed to Read DIO Scratch Register.");
+		break;
+	case TA_SECUREDISPLAY_STATUS__READ_CRC_ERROR:
+		dev_err(psp->adev->dev, "Secure display: Failed to Read CRC");
+		break;
+	case TA_SECUREDISPLAY_STATUS__I2C_INIT_ERROR:
+		dev_err(psp->adev->dev, "Secure display: Failed to initialize I2C.");
+		break;
+	default:
+		dev_err(psp->adev->dev, "Secure display: Failed to parse status: %d\n", status);
+	}
+}
+
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
+	enum ta_securedisplay_command command_id)
+{
+	*cmd = (struct ta_securedisplay_cmd *)psp->securedisplay_context.context.mem_context.shared_buf;
+	memset(*cmd, 0, sizeof(struct ta_securedisplay_cmd));
+	(*cmd)->status = TA_SECUREDISPLAY_STATUS__GENERIC_FAILURE;
+	(*cmd)->cmd_id = command_id;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __user *buf,
+		size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct psp_context *psp = &adev->psp;
+	struct ta_securedisplay_cmd *securedisplay_cmd;
+	struct drm_device *dev = adev_to_drm(adev);
+	uint32_t phy_id;
+	uint32_t op;
+	char str[64];
+	int ret;
+
+	if (*pos || size > sizeof(str) - 1)
+		return -EINVAL;
+
+	memset(str,  0, sizeof(str));
+	ret = copy_from_user(str, buf, size);
+	if (ret)
+		return -EFAULT;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return ret;
+	}
+
+	if (size < 3)
+		sscanf(str, "%u ", &op);
+	else
+		sscanf(str, "%u %u", &op, &phy_id);
+
+	switch (op) {
+	case 1:
+		mutex_lock(&psp->securedisplay_context.mutex);
+		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+			TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+		ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+		if (!ret) {
+			if (securedisplay_cmd->status == TA_SECUREDISPLAY_STATUS__SUCCESS)
+				dev_info(adev->dev, "SECUREDISPLAY: query securedisplay TA ret is 0x%X\n",
+					securedisplay_cmd->securedisplay_out_message.query_ta.query_cmd_ret);
+			else
+				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+		}
+		mutex_unlock(&psp->securedisplay_context.mutex);
+		break;
+	case 2:
+		if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) {
+			dev_err(adev->dev, "Invalid input: %s\n", str);
+			return -EINVAL;
+		}
+		mutex_lock(&psp->securedisplay_context.mutex);
+		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+			TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+		securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id;
+		ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+		if (!ret) {
+			if (securedisplay_cmd->status == TA_SECUREDISPLAY_STATUS__SUCCESS) {
+				dev_info(adev->dev, "SECUREDISPLAY: I2C buffer out put is: %*ph\n",
+					 TA_SECUREDISPLAY_I2C_BUFFER_SIZE,
+					 securedisplay_cmd->securedisplay_out_message.send_roi_crc.i2c_buf);
+			} else {
+				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
+			}
+		}
+		mutex_unlock(&psp->securedisplay_context.mutex);
+		break;
+	default:
+		dev_err(adev->dev, "Invalid input: %s\n", str);
+	}
+
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return size;
+}
+
+static const struct file_operations amdgpu_securedisplay_debugfs_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_securedisplay_debugfs_write,
+	.llseek = default_llseek
+};
+
+#endif
+
+void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+
+	if (!adev->psp.securedisplay_context.context.initialized)
+		return;
+
+	debugfs_create_file("securedisplay_test", S_IWUSR, adev_to_drm(adev)->primary->debugfs_root,
+				adev, &amdgpu_securedisplay_debugfs_ops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
new file mode 100644
index 000000000000..456ad68ed4b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef _AMDGPU_SECUREDISPLAY_H
+#define _AMDGPU_SECUREDISPLAY_H
+
+#include "amdgpu.h"
+#include "ta_secureDisplay_if.h"
+
+void amdgpu_securedisplay_debugfs_init(struct amdgpu_device *adev);
+void psp_securedisplay_parse_resp_status(struct psp_context *psp,
+		enum ta_securedisplay_status status);
+void psp_prep_securedisplay_cmd_buf(struct psp_context *psp, struct ta_securedisplay_cmd **cmd,
+		enum ta_securedisplay_command command_id);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
new file mode 100644
index 000000000000..a0b479d5fff1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_seq64.h"
+
+#include <drm/drm_exec.h>
+
+/**
+ * DOC: amdgpu_seq64
+ *
+ * amdgpu_seq64 allocates a 64bit memory on each request in sequence order.
+ * seq64 driver is required for user queue fence memory allocation, TLB
+ * counters and VM updates. It has maximum count of 32768 64 bit slots.
+ */
+
+/**
+ * amdgpu_seq64_get_va_base - Get the seq64 va base address
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * va base address on success
+ */
+static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
+{
+	u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+	addr = amdgpu_gmc_sign_extend(addr);
+
+	return addr;
+}
+
+/**
+ * amdgpu_seq64_map - Map the seq64 memory to VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: vm pointer
+ * @bo_va: bo_va pointer
+ *
+ * Map the seq64 memory to the given VM.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va)
+{
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	u64 seq64_addr;
+	int r;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return -EINVAL;
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+	if (!*bo_va) {
+		r = -ENOMEM;
+		goto error;
+	}
+
+	seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+	r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
+			     AMDGPU_VA_RESERVED_SEQ64_SIZE,
+			     AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
+	if (r) {
+		DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+	r = amdgpu_vm_bo_update(adev, *bo_va, false);
+	if (r) {
+		DRM_ERROR("failed to do vm_bo_update on userq sem\n");
+		amdgpu_vm_bo_del(adev, *bo_va);
+		goto error;
+	}
+
+error:
+	drm_exec_fini(&exec);
+	return r;
+}
+
+/**
+ * amdgpu_seq64_unmap - Unmap the seq64 memory
+ *
+ * @adev: amdgpu_device pointer
+ * @fpriv: DRM file private
+ *
+ * Unmap the seq64 memory from the given VM.
+ */
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
+{
+	struct amdgpu_vm *vm;
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	int r;
+
+	if (!fpriv->seq64_va)
+		return;
+
+	bo = adev->seq64.sbo;
+	if (!bo)
+		return;
+
+	vm = &fpriv->vm;
+
+	drm_exec_init(&exec, 0, 0);
+	drm_exec_until_all_locked(&exec) {
+		r = amdgpu_vm_lock_pd(vm, &exec, 0);
+		if (likely(!r))
+			r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(r))
+			goto error;
+	}
+
+	amdgpu_vm_bo_del(adev, fpriv->seq64_va);
+
+	fpriv->seq64_va = NULL;
+
+error:
+	drm_exec_fini(&exec);
+}
+
+/**
+ * amdgpu_seq64_alloc - Allocate a 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
+ * @cpu_addr: CPU address to access the seq
+ *
+ * Alloc a 64 bit memory from seq64 pool.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+		       u64 *gpu_addr, u64 **cpu_addr)
+{
+	unsigned long bit_pos;
+
+	bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem);
+	if (bit_pos >= adev->seq64.num_sem)
+		return -ENOSPC;
+
+	__set_bit(bit_pos, adev->seq64.used);
+
+	*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+	if (gpu_addr)
+		*gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
+	*cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
+
+	return 0;
+}
+
+/**
+ * amdgpu_seq64_free - Free the given 64 bit memory
+ *
+ * @adev: amdgpu_device pointer
+ * @va: gpu start address to be freed
+ *
+ * Free the given 64 bit memory from seq64 pool.
+ */
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va)
+{
+	unsigned long bit_pos;
+
+	bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64);
+	if (bit_pos < adev->seq64.num_sem)
+		__clear_bit(bit_pos, adev->seq64.used);
+}
+
+/**
+ * amdgpu_seq64_fini - Cleanup seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Free the memory space allocated for seq64.
+ *
+ */
+void amdgpu_seq64_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->seq64.sbo,
+			      NULL,
+			      (void **)&adev->seq64.cpu_base_addr);
+}
+
+/**
+ * amdgpu_seq64_init - Initialize seq64 driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate the required memory space for seq64.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure
+ */
+int amdgpu_seq64_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->seq64.sbo)
+		return 0;
+
+	/*
+	 * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS
+	 * 64bit slots
+	 */
+	r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
+				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->seq64.sbo, &adev->seq64.gpu_addr,
+				    (void **)&adev->seq64.cpu_base_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
+		return r;
+	}
+
+	memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE);
+
+	adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS;
+	memset(&adev->seq64.used, 0, sizeof(adev->seq64.used));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
new file mode 100644
index 000000000000..26a249aaaee1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SEQ64_H__
+#define __AMDGPU_SEQ64_H__
+
+#include "amdgpu_vm.h"
+
+#define AMDGPU_MAX_SEQ64_SLOTS         (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64))
+
+struct amdgpu_seq64 {
+	struct amdgpu_bo *sbo;
+	u32 num_sem;
+	u64 gpu_addr;
+	u64 *cpu_base_addr;
+	DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
+};
+
+void amdgpu_seq64_fini(struct amdgpu_device *adev);
+int amdgpu_seq64_init(struct amdgpu_device *adev);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
+void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
+int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct amdgpu_bo_va **bo_va);
+void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv);
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
new file mode 100644
index 000000000000..ec9d12f85f39
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_SMUIO_H__
+#define __AMDGPU_SMUIO_H__
+
+enum amdgpu_pkg_type {
+	AMDGPU_PKG_TYPE_APU = 2,
+	AMDGPU_PKG_TYPE_CEM = 3,
+	AMDGPU_PKG_TYPE_OAM = 4,
+	AMDGPU_PKG_TYPE_UNKNOWN,
+};
+
+struct amdgpu_smuio_mcm_config_info {
+	int socket_id;
+	int die_id;
+};
+
+struct amdgpu_smuio_funcs {
+	u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
+	u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
+	void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
+	void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
+	u32 (*get_die_id)(struct amdgpu_device *adev);
+	u32 (*get_socket_id)(struct amdgpu_device *adev);
+	enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
+	bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+	u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_smuio {
+	const struct amdgpu_smuio_funcs		*funcs;
+};
+
+#endif /* __AMDGPU_SMUIO_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
new file mode 100644
index 000000000000..f4176cb01790
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_SOCBB_H__
+#define __AMDGPU_SOCBB_H__
+
+struct gpu_info_voltage_scaling_v1_0 {
+	uint32_t state;
+	uint32_t dscclk_mhz;
+	uint32_t dcfclk_mhz;
+	uint32_t socclk_mhz;
+	uint32_t dram_speed_mts;
+	uint32_t fabricclk_mhz;
+	uint32_t dispclk_mhz;
+	uint32_t phyclk_mhz;
+	uint32_t dppclk_mhz;
+};
+
+struct gpu_info_soc_bounding_box_v1_0 {
+	uint32_t sr_exit_time_us;
+	uint32_t sr_enter_plus_exit_time_us;
+	uint32_t urgent_latency_us;
+	uint32_t urgent_latency_pixel_data_only_us;
+	uint32_t urgent_latency_pixel_mixed_with_vm_data_us;
+	uint32_t urgent_latency_vm_data_only_us;
+	uint32_t writeback_latency_us;
+	uint32_t ideal_dram_bw_after_urgent_percent;
+	uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
+	uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+	uint32_t pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+	uint32_t max_avg_sdp_bw_use_normal_percent;
+	uint32_t max_avg_dram_bw_use_normal_percent;
+	uint32_t max_request_size_bytes;
+	uint32_t downspread_percent;
+	uint32_t dram_page_open_time_ns;
+	uint32_t dram_rw_turnaround_time_ns;
+	uint32_t dram_return_buffer_per_channel_bytes;
+	uint32_t dram_channel_width_bytes;
+	uint32_t fabric_datapath_to_dcn_data_return_bytes;
+	uint32_t dcn_downspread_percent;
+	uint32_t dispclk_dppclk_vco_speed_mhz;
+	uint32_t dfs_vco_period_ps;
+	uint32_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
+	uint32_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+	uint32_t urgent_out_of_order_return_per_channel_vm_only_bytes;
+	uint32_t round_trip_ping_latency_dcfclk_cycles;
+	uint32_t urgent_out_of_order_return_per_channel_bytes;
+	uint32_t channel_interleave_bytes;
+	uint32_t num_banks;
+	uint32_t num_chans;
+	uint32_t vmm_page_size_bytes;
+	uint32_t dram_clock_change_latency_us;
+	uint32_t writeback_dram_clock_change_latency_us;
+	uint32_t return_bus_width_bytes;
+	uint32_t voltage_override;
+	uint32_t xfc_bus_transport_time_us;
+	uint32_t xfc_xbuf_latency_tolerance_us;
+	uint32_t use_urgent_burst_bw;
+	uint32_t num_states;
+	struct gpu_info_voltage_scaling_v1_0 clock_limits[8];
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
new file mode 100644
index 000000000000..d6ae9974c952
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/dma-fence-chain.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+
+struct amdgpu_sync_entry {
+	struct hlist_node	node;
+	struct dma_fence	*fence;
+};
+
+static struct kmem_cache *amdgpu_sync_slab;
+
+/**
+ * amdgpu_sync_create - zero init sync object
+ *
+ * @sync: sync object to initialize
+ *
+ * Just clear the sync object for now.
+ */
+void amdgpu_sync_create(struct amdgpu_sync *sync)
+{
+	hash_init(sync->fences);
+}
+
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
+static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
+				 struct dma_fence *f)
+{
+	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+	if (s_fence) {
+		struct amdgpu_ring *ring;
+
+		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
+		return ring->adev == adev;
+	}
+
+	return false;
+}
+
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @f: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct dma_fence *f)
+{
+	struct drm_sched_fence *s_fence;
+	struct amdgpu_amdkfd_fence *kfd_fence;
+
+	if (!f)
+		return AMDGPU_FENCE_OWNER_UNDEFINED;
+
+	s_fence = to_drm_sched_fence(f);
+	if (s_fence)
+		return s_fence->owner;
+
+	kfd_fence = to_amdgpu_amdkfd_fence(f);
+	if (kfd_fence)
+		return AMDGPU_FENCE_OWNER_KFD;
+
+	return AMDGPU_FENCE_OWNER_UNDEFINED;
+}
+
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
+static void amdgpu_sync_keep_later(struct dma_fence **keep,
+				   struct dma_fence *fence)
+{
+	if (*keep && dma_fence_is_later(*keep, fence))
+		return;
+
+	dma_fence_put(*keep);
+	*keep = dma_fence_get(fence);
+}
+
+/**
+ * amdgpu_sync_add_later - add the fence to the hash
+ *
+ * @sync: sync object to add the fence to
+ * @f: fence to add
+ *
+ * Tries to add the fence to an existing hash entry. Returns true when an entry
+ * was found, false otherwise.
+ */
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
+{
+	struct amdgpu_sync_entry *e;
+
+	hash_for_each_possible(sync->fences, e, node, f->context) {
+		if (dma_fence_is_signaled(e->fence)) {
+			dma_fence_put(e->fence);
+			e->fence = dma_fence_get(f);
+			return true;
+		}
+
+		if (likely(e->fence->context == f->context)) {
+			amdgpu_sync_keep_later(&e->fence, f);
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * amdgpu_sync_fence - remember to sync to this fence
+ *
+ * @sync: sync object to add fence to
+ * @f: fence to sync to
+ * @flags: memory allocation flags to use when allocating sync entry
+ *
+ * Add the fence to the sync object.
+ */
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+		      gfp_t flags)
+{
+	struct amdgpu_sync_entry *e;
+
+	if (!f)
+		return 0;
+
+	if (amdgpu_sync_add_later(sync, f))
+		return 0;
+
+	e = kmem_cache_alloc(amdgpu_sync_slab, flags);
+	if (!e)
+		return -ENOMEM;
+
+	hash_add(sync->fences, &e->node, f->context);
+	e->fence = dma_fence_get(f);
+	return 0;
+}
+
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+				   enum amdgpu_sync_mode mode,
+				   void *owner, struct dma_fence *f)
+{
+	void *fence_owner = amdgpu_sync_get_owner(f);
+
+	/* Always sync to moves, no matter what */
+	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+		return true;
+
+	/* We only want to trigger KFD eviction fences on
+	 * evict or move jobs. Skip KFD fences otherwise.
+	 */
+	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Never sync to VM updates either. */
+	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
+	    owner != AMDGPU_FENCE_OWNER_KFD)
+		return false;
+
+	/* Ignore fences depending on the sync mode */
+	switch (mode) {
+	case AMDGPU_SYNC_ALWAYS:
+		return true;
+
+	case AMDGPU_SYNC_NE_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner == owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EQ_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner != owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EXPLICIT:
+		return false;
+	}
+
+	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+	     "Adding eviction fence to sync obj");
+	return true;
+}
+
+/**
+ * amdgpu_sync_resv - sync to a reservation object
+ *
+ * @adev: amdgpu device
+ * @sync: sync object to add fences from reservation object to
+ * @resv: reservation object with embedded fence
+ * @mode: how owner affects which fences we sync to
+ * @owner: owner of the planned job submission
+ *
+ * Sync to the fence
+ */
+int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+		     struct dma_resv *resv, enum amdgpu_sync_mode mode,
+		     void *owner)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *f;
+	int r;
+
+	if (resv == NULL)
+		return -EINVAL;
+	/* Implicitly sync only to KERNEL, WRITE and READ */
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
+		dma_fence_chain_for_each(f, f) {
+			struct dma_fence *tmp = dma_fence_chain_contained(f);
+
+			if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
+				r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+				dma_fence_put(f);
+				if (r)
+					return r;
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_sync_kfd - sync to KFD fences
+ *
+ * @sync: sync object to add KFD fences to
+ * @resv: reservation object with KFD fences
+ *
+ * Extract all KFD fences and add them to the sync object.
+ */
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *f;
+	int r = 0;
+
+	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
+	dma_resv_for_each_fence_unlocked(&cursor, f) {
+		void *fence_owner = amdgpu_sync_get_owner(f);
+
+		if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
+			continue;
+
+		r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
+		if (r)
+			break;
+	}
+	dma_resv_iter_end(&cursor);
+
+	return r;
+}
+
+/* Free the entry back to the slab */
+static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
+{
+	hash_del(&e->node);
+	dma_fence_put(e->fence);
+	kmem_cache_free(amdgpu_sync_slab, e);
+}
+
+/**
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
+ *
+ * @sync: the sync object
+ * @ring: optional ring to use for test
+ *
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
+ */
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+					 struct amdgpu_ring *ring)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		struct dma_fence *f = e->fence;
+		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+		if (dma_fence_is_signaled(f)) {
+			amdgpu_sync_entry_free(e);
+			continue;
+		}
+		if (ring && s_fence) {
+			/* For fences from the same ring it is sufficient
+			 * when they are scheduled.
+			 */
+			if (s_fence->sched == &ring->sched) {
+				if (dma_fence_is_signaled(&s_fence->scheduled))
+					continue;
+
+				return &s_fence->scheduled;
+			}
+		}
+
+		return f;
+	}
+
+	return NULL;
+}
+
+/**
+ * amdgpu_sync_get_fence - get the next fence from the sync object
+ *
+ * @sync: sync object to use
+ *
+ * Get and removes the next fence from the sync object not signaled yet.
+ */
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+
+		f = e->fence;
+
+		hash_del(&e->node);
+		kmem_cache_free(amdgpu_sync_slab, e);
+
+		if (!dma_fence_is_signaled(f))
+			return f;
+
+		dma_fence_put(f);
+	}
+	return NULL;
+}
+
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i, r;
+
+	hash_for_each_safe(source->fences, i, tmp, e, node) {
+		f = e->fence;
+		if (!dma_fence_is_signaled(f)) {
+			r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
+			if (r)
+				return r;
+		} else {
+			amdgpu_sync_entry_free(e);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+	unsigned int i;
+
+	amdgpu_sync_free(dst);
+
+	for (i = 0; i < HASH_SIZE(src->fences); ++i)
+		hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
+/**
+ * amdgpu_sync_push_to_job - push fences into job
+ * @sync: sync object to get the fences from
+ * @job: job to push the fences into
+ *
+ * Add all unsignaled fences from sync to job.
+ */
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i, r;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		f = e->fence;
+		if (dma_fence_is_signaled(f)) {
+			amdgpu_sync_entry_free(e);
+			continue;
+		}
+
+		dma_fence_get(f);
+		r = drm_sched_job_add_dependency(&job->base, f);
+		if (r) {
+			dma_fence_put(f);
+			return r;
+		}
+	}
+	return 0;
+}
+
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i, r;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		r = dma_fence_wait(e->fence, intr);
+		if (r)
+			return r;
+
+		amdgpu_sync_entry_free(e);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_sync_free - free the sync object
+ *
+ * @sync: sync object to use
+ *
+ * Free the sync object.
+ */
+void amdgpu_sync_free(struct amdgpu_sync *sync)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	unsigned int i;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node)
+		amdgpu_sync_entry_free(e);
+}
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+	amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN);
+	if (!amdgpu_sync_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+	kmem_cache_destroy(amdgpu_sync_slab);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
new file mode 100644
index 000000000000..51eb4382c91e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_SYNC_H__
+#define __AMDGPU_SYNC_H__
+
+#include <linux/hashtable.h>
+
+struct dma_fence;
+struct dma_resv;
+struct amdgpu_device;
+struct amdgpu_ring;
+struct amdgpu_job;
+
+enum amdgpu_sync_mode {
+	AMDGPU_SYNC_ALWAYS,
+	AMDGPU_SYNC_NE_OWNER,
+	AMDGPU_SYNC_EQ_OWNER,
+	AMDGPU_SYNC_EXPLICIT
+};
+
+/*
+ * Container for fences used to sync command submissions.
+ */
+struct amdgpu_sync {
+	DECLARE_HASHTABLE(fences, 4);
+};
+
+void amdgpu_sync_create(struct amdgpu_sync *sync);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+		      gfp_t flags);
+int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+		     struct dma_resv *resv, enum amdgpu_sync_mode mode,
+		     void *owner);
+int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
+struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
+struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
+int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
new file mode 100644
index 000000000000..d13e64a69e25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -0,0 +1,587 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#if !defined(_AMDGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _AMDGPU_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM amdgpu
+#define TRACE_INCLUDE_FILE amdgpu_trace
+
+#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
+	 job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
+
+TRACE_EVENT(amdgpu_device_rreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%08lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_device_wreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%08lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_iv,
+	    TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv),
+	    TP_ARGS(ih, iv),
+	    TP_STRUCT__entry(
+			     __field(unsigned, ih)
+			     __field(unsigned, client_id)
+			     __field(unsigned, src_id)
+			     __field(unsigned, ring_id)
+			     __field(unsigned, vmid)
+			     __field(unsigned, vmid_src)
+			     __field(uint64_t, timestamp)
+			     __field(unsigned, timestamp_src)
+			     __field(unsigned, pasid)
+			     __array(unsigned, src_data, 4)
+			    ),
+	    TP_fast_assign(
+			   __entry->ih = ih;
+			   __entry->client_id = iv->client_id;
+			   __entry->src_id = iv->src_id;
+			   __entry->ring_id = iv->ring_id;
+			   __entry->vmid = iv->vmid;
+			   __entry->vmid_src = iv->vmid_src;
+			   __entry->timestamp = iv->timestamp;
+			   __entry->timestamp_src = iv->timestamp_src;
+			   __entry->pasid = iv->pasid;
+			   __entry->src_data[0] = iv->src_data[0];
+			   __entry->src_data[1] = iv->src_data[1];
+			   __entry->src_data[2] = iv->src_data[2];
+			   __entry->src_data[3] = iv->src_data[3];
+			   ),
+	    TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u "
+		      "timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
+		      __entry->ih, __entry->client_id, __entry->src_id,
+		      __entry->ring_id, __entry->vmid,
+		      __entry->timestamp, __entry->pasid,
+		      __entry->src_data[0], __entry->src_data[1],
+		      __entry->src_data[2], __entry->src_data[3])
+);
+
+
+TRACE_EVENT(amdgpu_bo_create,
+	    TP_PROTO(struct amdgpu_bo *bo),
+	    TP_ARGS(bo),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_bo *, bo)
+			     __field(u32, pages)
+			     __field(u32, type)
+			     __field(u32, prefer)
+			     __field(u32, allow)
+			     __field(u32, visible)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->bo = bo;
+			   __entry->pages = PFN_UP(bo->tbo.resource->size);
+			   __entry->type = bo->tbo.resource->mem_type;
+			   __entry->prefer = bo->preferred_domains;
+			   __entry->allow = bo->allowed_domains;
+			   __entry->visible = bo->flags;
+			   ),
+
+	    TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d",
+		       __entry->bo, __entry->pages, __entry->type,
+		       __entry->prefer, __entry->allow, __entry->visible)
+);
+
+TRACE_EVENT(amdgpu_cs,
+	    TP_PROTO(struct amdgpu_cs_parser *p,
+		     struct amdgpu_job *job,
+		     struct amdgpu_ib *ib),
+	    TP_ARGS(p, job, ib),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_bo_list *, bo_list)
+			     __field(u32, ring)
+			     __field(u32, dw)
+			     __field(u32, fences)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->bo_list = p->bo_list;
+			   __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx;
+			   __entry->dw = ib->length_dw;
+			   __entry->fences = amdgpu_fence_count_emitted(
+				to_amdgpu_ring(job->base.entity->rq->sched));
+			   ),
+	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
+		      __entry->bo_list, __entry->ring, __entry->dw,
+		      __entry->fences)
+);
+
+TRACE_EVENT(amdgpu_cs_ioctl,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+			     __field(u64, context)
+			     __field(u64, seqno)
+			     __field(struct dma_fence *, fence)
+			     __string(ring, to_amdgpu_ring(job->base.sched)->name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __assign_str(timeline);
+			   __entry->context = job->base.s_fence->finished.context;
+			   __entry->seqno = job->base.s_fence->finished.seqno;
+			   __assign_str(ring);
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+		      __get_str(timeline), __entry->context,
+		      __entry->seqno, __get_str(ring), __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
+			     __field(u64, context)
+			     __field(u64, seqno)
+			     __string(ring, to_amdgpu_ring(job->base.sched)->name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __assign_str(timeline);
+			   __entry->context = job->base.s_fence->finished.context;
+			   __entry->seqno = job->base.s_fence->finished.seqno;
+			   __assign_str(ring);
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("timeline=%s, fence=%llu:%llu, ring_name=%s, num_ibs=%u",
+		      __get_str(timeline), __entry->context,
+		      __entry->seqno, __get_str(ring), __entry->num_ibs)
+);
+
+
+TRACE_EVENT(amdgpu_vm_grab_id,
+	    TP_PROTO(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		     struct amdgpu_job *job),
+	    TP_ARGS(vm, ring, job),
+	    TP_STRUCT__entry(
+			     __field(u32, pasid)
+			     __string(ring, ring->name)
+			     __field(u32, ring)
+			     __field(u32, vmid)
+			     __field(u32, vm_hub)
+			     __field(u64, pd_addr)
+			     __field(u32, needs_flush)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pasid = vm->pasid;
+			   __assign_str(ring);
+			   __entry->vmid = job->vmid;
+			   __entry->vm_hub = ring->vm_hub,
+			   __entry->pd_addr = job->vm_pd_addr;
+			   __entry->needs_flush = job->vm_needs_flush;
+			   ),
+	    TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->pasid, __get_str(ring), __entry->vmid,
+		      __entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_map,
+	    TP_PROTO(struct amdgpu_bo_va *bo_va,
+		     struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(bo_va, mapping),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_bo *, bo)
+			     __field(long, start)
+			     __field(long, last)
+			     __field(u64, offset)
+			     __field(u64, flags)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->bo = bo_va ? bo_va->base.bo : NULL;
+			   __entry->start = mapping->start;
+			   __entry->last = mapping->last;
+			   __entry->offset = mapping->offset;
+			   __entry->flags = mapping->flags;
+			   ),
+	    TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+		      __entry->bo, __entry->start, __entry->last,
+		      __entry->offset, __entry->flags)
+);
+
+TRACE_EVENT(amdgpu_vm_bo_unmap,
+	    TP_PROTO(struct amdgpu_bo_va *bo_va,
+		     struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(bo_va, mapping),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_bo *, bo)
+			     __field(long, start)
+			     __field(long, last)
+			     __field(u64, offset)
+			     __field(u64, flags)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->bo = bo_va ? bo_va->base.bo : NULL;
+			   __entry->start = mapping->start;
+			   __entry->last = mapping->last;
+			   __entry->offset = mapping->offset;
+			   __entry->flags = mapping->flags;
+			   ),
+	    TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx",
+		      __entry->bo, __entry->start, __entry->last,
+		      __entry->offset, __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
+	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(mapping),
+	    TP_STRUCT__entry(
+			     __field(u64, soffset)
+			     __field(u64, eoffset)
+			     __field(u64, flags)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->soffset = mapping->start;
+			   __entry->eoffset = mapping->last + 1;
+			   __entry->flags = mapping->flags;
+			   ),
+	    TP_printk("soffs=%010llx, eoffs=%010llx, flags=%llx",
+		      __entry->soffset, __entry->eoffset, __entry->flags)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_update,
+	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(mapping)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
+	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(mapping)
+);
+
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
+	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(mapping)
+);
+
+TRACE_EVENT(amdgpu_vm_update_ptes,
+	    TP_PROTO(struct amdgpu_vm_update_params *p,
+		     uint64_t start, uint64_t end,
+		     unsigned int nptes, uint64_t dst,
+		     uint64_t incr, uint64_t flags,
+		     pid_t pid, uint64_t vm_ctx),
+	TP_ARGS(p, start, end, nptes, dst, incr, flags, pid, vm_ctx),
+	TP_STRUCT__entry(
+			 __field(u64, start)
+			 __field(u64, end)
+			 __field(u64, flags)
+			 __field(unsigned int, nptes)
+			 __field(u64, incr)
+			 __field(pid_t, pid)
+			 __field(u64, vm_ctx)
+			 __dynamic_array(u64, dst, nptes)
+	),
+
+	TP_fast_assign(
+			unsigned int i;
+
+			__entry->start = start;
+			__entry->end = end;
+			__entry->flags = flags;
+			__entry->incr = incr;
+			__entry->nptes = nptes;
+			__entry->pid = pid;
+			__entry->vm_ctx = vm_ctx;
+			for (i = 0; i < nptes; ++i) {
+				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
+					p->pages_addr, dst) : dst;
+
+				((u64 *)__get_dynamic_array(dst))[i] = addr;
+				dst += incr;
+			}
+	),
+	TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
+		  " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
+		  __entry->vm_ctx, __entry->start, __entry->end,
+		  __entry->flags, __entry->incr,  __print_array(
+		  __get_dynamic_array(dst), __entry->nptes, 8))
+);
+
+TRACE_EVENT(amdgpu_vm_set_ptes,
+	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
+		     uint32_t incr, uint64_t flags, bool immediate),
+	    TP_ARGS(pe, addr, count, incr, flags, immediate),
+	    TP_STRUCT__entry(
+			     __field(u64, pe)
+			     __field(u64, addr)
+			     __field(u32, count)
+			     __field(u32, incr)
+			     __field(u64, flags)
+			     __field(bool, immediate)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pe = pe;
+			   __entry->addr = addr;
+			   __entry->count = count;
+			   __entry->incr = incr;
+			   __entry->flags = flags;
+			   __entry->immediate = immediate;
+			   ),
+	    TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, "
+		      "immediate=%d", __entry->pe, __entry->addr, __entry->incr,
+		      __entry->flags, __entry->count, __entry->immediate)
+);
+
+TRACE_EVENT(amdgpu_vm_copy_ptes,
+	    TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate),
+	    TP_ARGS(pe, src, count, immediate),
+	    TP_STRUCT__entry(
+			     __field(u64, pe)
+			     __field(u64, src)
+			     __field(u32, count)
+			     __field(bool, immediate)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pe = pe;
+			   __entry->src = src;
+			   __entry->count = count;
+			   __entry->immediate = immediate;
+			   ),
+	    TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d",
+		      __entry->pe, __entry->src, __entry->count,
+		      __entry->immediate)
+);
+
+TRACE_EVENT(amdgpu_vm_flush,
+	    TP_PROTO(struct amdgpu_ring *ring, unsigned vmid,
+		     uint64_t pd_addr),
+	    TP_ARGS(ring, vmid, pd_addr),
+	    TP_STRUCT__entry(
+			     __string(ring, ring->name)
+			     __field(u32, vmid)
+			     __field(u32, vm_hub)
+			     __field(u64, pd_addr)
+			     ),
+
+	    TP_fast_assign(
+			   __assign_str(ring);
+			   __entry->vmid = vmid;
+			   __entry->vm_hub = ring->vm_hub;
+			   __entry->pd_addr = pd_addr;
+			   ),
+	    TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
+		      __get_str(ring), __entry->vmid,
+		      __entry->vm_hub, __entry->pd_addr)
+);
+
+DECLARE_EVENT_CLASS(amdgpu_pasid,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid),
+	    TP_STRUCT__entry(
+			     __field(unsigned, pasid)
+			     ),
+	    TP_fast_assign(
+			   __entry->pasid = pasid;
+			   ),
+	    TP_printk("pasid=%u", __entry->pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
+	    TP_PROTO(unsigned pasid),
+	    TP_ARGS(pasid)
+);
+
+TRACE_EVENT(amdgpu_isolation,
+	    TP_PROTO(void *prev, void *next),
+	    TP_ARGS(prev, next),
+	    TP_STRUCT__entry(
+			     __field(void *, prev)
+			     __field(void *, next)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->prev = prev;
+			   __entry->next = next;
+			   ),
+	    TP_printk("prev=%p, next=%p",
+		      __entry->prev,
+		      __entry->next)
+);
+
+TRACE_EVENT(amdgpu_cleaner_shader,
+	    TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
+	    TP_ARGS(ring, fence),
+	    TP_STRUCT__entry(
+			     __string(ring, ring->name)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __assign_str(ring);
+			   __entry->seqno = fence->seqno;
+			   ),
+	    TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
+);
+
+TRACE_EVENT(amdgpu_bo_list_set,
+	    TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
+	    TP_ARGS(list, bo),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_bo_list *, list)
+			     __field(struct amdgpu_bo *, bo)
+			     __field(u64, bo_size)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->list = list;
+			   __entry->bo = bo;
+			   __entry->bo_size = amdgpu_bo_size(bo);
+			   ),
+	    TP_printk("list=%p, bo=%p, bo_size=%Ld",
+		      __entry->list,
+		      __entry->bo,
+		      __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+	    TP_PROTO(uint64_t total_bo, uint64_t total_size),
+	    TP_ARGS(total_bo, total_size),
+	    TP_STRUCT__entry(
+			__field(u64, total_bo)
+			__field(u64, total_size)
+			),
+
+	    TP_fast_assign(
+			__entry->total_bo = total_bo;
+			__entry->total_size = total_size;
+			),
+	    TP_printk("total_bo_size=%Ld, total_bo_count=%Ld",
+			__entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_bo_move,
+	    TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement),
+	    TP_ARGS(bo, new_placement, old_placement),
+	    TP_STRUCT__entry(
+			__field(struct amdgpu_bo *, bo)
+			__field(u64, bo_size)
+			__field(u32, new_placement)
+			__field(u32, old_placement)
+			),
+
+	    TP_fast_assign(
+			__entry->bo      = bo;
+			__entry->bo_size = amdgpu_bo_size(bo);
+			__entry->new_placement = new_placement;
+			__entry->old_placement = old_placement;
+			),
+	    TP_printk("bo=%p, from=%d, to=%d, size=%Ld",
+			__entry->bo, __entry->old_placement,
+			__entry->new_placement, __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_ib_pipe_sync,
+	    TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
+	    TP_ARGS(sched_job, fence),
+	    TP_STRUCT__entry(
+			     __string(ring, sched_job->base.sched->name)
+			     __field(struct dma_fence *, fence)
+			     __field(u64, ctx)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __assign_str(ring);
+			   __entry->fence = fence;
+			   __entry->ctx = fence->context;
+			   __entry->seqno = fence->seqno;
+			   ),
+	    TP_printk("job ring=%s need pipe sync to fence=%llu:%llu",
+		      __get_str(ring), __entry->ctx, __entry->seqno)
+);
+
+TRACE_EVENT(amdgpu_reset_reg_dumps,
+	    TP_PROTO(uint32_t address, uint32_t value),
+	    TP_ARGS(address, value),
+	    TP_STRUCT__entry(
+			     __field(uint32_t, address)
+			     __field(uint32_t, value)
+			     ),
+	    TP_fast_assign(
+			   __entry->address = address;
+			   __entry->value = value;
+			   ),
+	    TP_printk("amdgpu register dump 0x%x: 0x%x",
+		      __entry->address,
+		      __entry->value)
+);
+
+#undef AMDGPU_JOB_GET_TIMELINE_NAME
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/amd/amdgpu
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
new file mode 100644
index 000000000000..b96d885f6e33
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+/* Copyright Red Hat Inc 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author : Dave Airlie <airlied@redhat.com>
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu_cs.h"
+#include "amdgpu.h"
+
+#define CREATE_TRACE_POINTS
+#include "amdgpu_trace.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
new file mode 100644
index 000000000000..2b931e855abd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -0,0 +1,2721 @@
+/*
+ * Copyright 2009 Jerome Glisse.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Jerome Glisse <glisse@freedesktop.org>
+ *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
+ *    Dave Airlie
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/pagemap.h>
+#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/dma-buf.h>
+#include <linux/sizes.h>
+#include <linux/module.h>
+
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_sdma.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_atomfirmware.h"
+#include "amdgpu_res_cursor.h"
+#include "bif/bif_4_1_d.h"
+
+MODULE_IMPORT_NS("DMA_BUF");
+
+#define AMDGPU_TTM_VRAM_MAX_DW_READ	((size_t)128)
+
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
+				   struct ttm_tt *ttm,
+				   struct ttm_resource *bo_mem);
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
+				      struct ttm_tt *ttm);
+
+static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
+				    unsigned int type,
+				    uint64_t size_in_page)
+{
+	return ttm_range_man_init(&adev->mman.bdev, type,
+				  false, size_in_page);
+}
+
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
+static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
+				struct ttm_placement *placement)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_bo *abo;
+	static const struct ttm_place placements = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.mem_type = TTM_PL_SYSTEM,
+		.flags = 0
+	};
+
+	/* Don't handle scatter gather BOs */
+	if (bo->type == ttm_bo_type_sg) {
+		placement->num_placement = 0;
+		return;
+	}
+
+	/* Object isn't an AMDGPU object so ignore */
+	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
+		placement->placement = &placements;
+		placement->num_placement = 1;
+		return;
+	}
+
+	abo = ttm_to_amdgpu_bo(bo);
+	if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
+		placement->num_placement = 0;
+		return;
+	}
+
+	switch (bo->resource->mem_type) {
+	case AMDGPU_PL_GDS:
+	case AMDGPU_PL_GWS:
+	case AMDGPU_PL_OA:
+	case AMDGPU_PL_DOORBELL:
+	case AMDGPU_PL_MMIO_REMAP:
+		placement->num_placement = 0;
+		return;
+
+	case TTM_PL_VRAM:
+		if (!adev->mman.buffer_funcs_enabled) {
+			/* Move to system memory */
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
+		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+			   amdgpu_res_cpu_visible(adev, bo->resource)) {
+
+			/* Try evicting to the CPU inaccessible part of VRAM
+			 * first, but only set GTT as busy placement, so this
+			 * BO will be evicted to GTT rather than causing other
+			 * BOs to be evicted from VRAM
+			 */
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+							AMDGPU_GEM_DOMAIN_GTT |
+							AMDGPU_GEM_DOMAIN_CPU);
+			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+			abo->placements[0].lpfn = 0;
+			abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
+		} else {
+			/* Move to GTT memory */
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
+							AMDGPU_GEM_DOMAIN_CPU);
+		}
+		break;
+	case TTM_PL_TT:
+	case AMDGPU_PL_PREEMPT:
+	default:
+		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+		break;
+	}
+	*placement = abo->placement;
+}
+
+/**
+ * amdgpu_ttm_map_buffer - Map memory into the GART windows
+ * @bo: buffer object to map
+ * @mem: memory object to map
+ * @mm_cur: range to map
+ * @window: which GART window to use
+ * @ring: DMA ring to use for the copy
+ * @tmz: if we should setup a TMZ enabled mapping
+ * @size: in number of bytes to map, out number of bytes mapped
+ * @addr: resulting address inside the MC address space
+ *
+ * Setup one of the GART windows to access a specific piece of memory or return
+ * the physical address for local memory.
+ */
+static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
+				 struct ttm_resource *mem,
+				 struct amdgpu_res_cursor *mm_cur,
+				 unsigned int window, struct amdgpu_ring *ring,
+				 bool tmz, uint64_t *size, uint64_t *addr)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned int offset, num_pages, num_dw, num_bytes;
+	uint64_t src_addr, dst_addr;
+	struct amdgpu_job *job;
+	void *cpu_addr;
+	uint64_t flags;
+	int r;
+
+	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
+	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+
+	if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT))
+		return -EINVAL;
+
+	/* Map only what can't be accessed directly */
+	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
+		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
+			mm_cur->start;
+		return 0;
+	}
+
+
+	/*
+	 * If start begins at an offset inside the page, then adjust the size
+	 * and addr accordingly
+	 */
+	offset = mm_cur->start & ~PAGE_MASK;
+
+	num_pages = PFN_UP(*size + offset);
+	num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE);
+
+	*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
+
+	*addr = adev->gmc.gart_start;
+	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+		AMDGPU_GPU_PAGE_SIZE;
+	*addr += offset;
+
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4 + num_bytes,
+				     AMDGPU_IB_POOL_DELAYED, &job,
+				     AMDGPU_KERNEL_JOB_ID_TTM_MAP_BUFFER);
+	if (r)
+		return r;
+
+	src_addr = num_dw * 4;
+	src_addr += job->ibs[0].gpu_addr;
+
+	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+				dst_addr, num_bytes, 0);
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+
+	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
+	if (tmz)
+		flags |= AMDGPU_PTE_TMZ;
+
+	cpu_addr = &job->ibs[0].ptr[num_dw];
+
+	if (mem->mem_type == TTM_PL_TT) {
+		dma_addr_t *dma_addr;
+
+		dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
+		amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr);
+	} else {
+		u64 pa = mm_cur->start + adev->vm_manager.vram_base_offset;
+
+		amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr);
+	}
+
+	dma_fence_put(amdgpu_job_submit(job));
+	return 0;
+}
+
+/**
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ * @adev: amdgpu device
+ * @src: buffer/address where to read from
+ * @dst: buffer/address where to write to
+ * @size: number of bytes to copy
+ * @tmz: if a secure copy should be used
+ * @resv: resv object to sync to
+ * @f: Returns the last fence if multiple jobs are submitted.
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ */
+__attribute__((nonnull))
+static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+				      const struct amdgpu_copy_mem *src,
+				      const struct amdgpu_copy_mem *dst,
+				      uint64_t size, bool tmz,
+				      struct dma_resv *resv,
+				      struct dma_fence **f)
+{
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor src_mm, dst_mm;
+	struct dma_fence *fence = NULL;
+	int r = 0;
+	uint32_t copy_flags = 0;
+	struct amdgpu_bo *abo_src, *abo_dst;
+
+	if (!adev->mman.buffer_funcs_enabled) {
+		dev_err(adev->dev,
+			"Trying to move memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
+	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
+	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (src_mm.remaining) {
+		uint64_t from, to, cur_size, tiling_flags;
+		uint32_t num_type, data_format, max_com, write_compress_disable;
+		struct dma_fence *next;
+
+		/* Never copy more than 256MiB at once to avoid a timeout */
+		cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20);
+
+		/* Map src to window 0 and dst to window 1. */
+		r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
+					  0, ring, tmz, &cur_size, &from);
+		if (r)
+			goto error;
+
+		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
+					  1, ring, tmz, &cur_size, &to);
+		if (r)
+			goto error;
+
+		abo_src = ttm_to_amdgpu_bo(src->bo);
+		abo_dst = ttm_to_amdgpu_bo(dst->bo);
+		if (tmz)
+			copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+		if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+		    (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+			copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+		if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+		    (dst->mem->mem_type == TTM_PL_VRAM)) {
+			copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+			amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+			max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+			num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+			data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+			write_compress_disable =
+				AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
+			copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+				       AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+				       AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+				       AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+							     write_compress_disable));
+		}
+
+		r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+				       &next, false, true, copy_flags);
+		if (r)
+			goto error;
+
+		dma_fence_put(fence);
+		fence = next;
+
+		amdgpu_res_next(&src_mm, cur_size);
+		amdgpu_res_next(&dst_mm, cur_size);
+	}
+error:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	*f = fence;
+	return r;
+}
+
+/*
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
+ * help move buffers to and from VRAM.
+ */
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+			    bool evict,
+			    struct ttm_resource *new_mem,
+			    struct ttm_resource *old_mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_copy_mem src, dst;
+	struct dma_fence *fence = NULL;
+	int r;
+
+	src.bo = bo;
+	dst.bo = bo;
+	src.mem = old_mem;
+	dst.mem = new_mem;
+	src.offset = 0;
+	dst.offset = 0;
+
+	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+				       new_mem->size,
+				       amdgpu_bo_encrypted(abo),
+				       bo->base.resv, &fence);
+	if (r)
+		goto error;
+
+	/* clear the space being freed */
+	if (old_mem->mem_type == TTM_PL_VRAM &&
+	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
+		struct dma_fence *wipe_fence = NULL;
+
+		r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+				       false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT);
+		if (r) {
+			goto error;
+		} else if (wipe_fence) {
+			amdgpu_vram_mgr_set_cleared(bo->resource);
+			dma_fence_put(fence);
+			fence = wipe_fence;
+		}
+	}
+
+	/* Always block for VM page tables before committing the new location */
+	if (bo->type == ttm_bo_type_kernel)
+		r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
+	else
+		r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
+	dma_fence_put(fence);
+	return r;
+
+error:
+	if (fence)
+		dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+	return r;
+}
+
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
+ *
+ * Returns: true if the full resource is CPU visible, false otherwise.
+ */
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+			    struct ttm_resource *res)
+{
+	struct amdgpu_res_cursor cursor;
+
+	if (!res)
+		return false;
+
+	if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+	    res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL ||
+	    res->mem_type == AMDGPU_PL_MMIO_REMAP)
+		return true;
+
+	if (res->mem_type != TTM_PL_VRAM)
+		return false;
+
+	amdgpu_res_first(res, 0, res->size, &cursor);
+	while (cursor.remaining) {
+		if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+			return false;
+		amdgpu_res_next(&cursor, cursor.size);
+	}
+
+	return true;
+}
+
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+				struct ttm_resource *mem)
+{
+	if (!amdgpu_res_cpu_visible(adev, mem))
+		return false;
+
+	/* ttm_resource_ioremap only supports contiguous memory */
+	if (mem->mem_type == TTM_PL_VRAM &&
+	    !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+		return false;
+
+	return true;
+}
+
+/*
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
+static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
+			  struct ttm_operation_ctx *ctx,
+			  struct ttm_resource *new_mem,
+			  struct ttm_place *hop)
+{
+	struct amdgpu_device *adev;
+	struct amdgpu_bo *abo;
+	struct ttm_resource *old_mem = bo->resource;
+	int r;
+
+	if (new_mem->mem_type == TTM_PL_TT ||
+	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
+		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
+		if (r)
+			return r;
+	}
+
+	abo = ttm_to_amdgpu_bo(bo);
+	adev = amdgpu_ttm_adev(bo->bdev);
+
+	if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
+			 bo->ttm == NULL)) {
+		amdgpu_bo_move_notify(bo, evict, new_mem);
+		ttm_bo_move_null(bo, new_mem);
+		return 0;
+	}
+	if (old_mem->mem_type == TTM_PL_SYSTEM &&
+	    (new_mem->mem_type == TTM_PL_TT ||
+	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+		amdgpu_bo_move_notify(bo, evict, new_mem);
+		ttm_bo_move_null(bo, new_mem);
+		return 0;
+	}
+	if ((old_mem->mem_type == TTM_PL_TT ||
+	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
+	    new_mem->mem_type == TTM_PL_SYSTEM) {
+		r = ttm_bo_wait_ctx(bo, ctx);
+		if (r)
+			return r;
+
+		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+		amdgpu_bo_move_notify(bo, evict, new_mem);
+		ttm_resource_free(bo, &bo->resource);
+		ttm_bo_assign_mem(bo, new_mem);
+		return 0;
+	}
+
+	if (old_mem->mem_type == AMDGPU_PL_GDS ||
+	    old_mem->mem_type == AMDGPU_PL_GWS ||
+	    old_mem->mem_type == AMDGPU_PL_OA ||
+	    old_mem->mem_type == AMDGPU_PL_DOORBELL ||
+	    old_mem->mem_type == AMDGPU_PL_MMIO_REMAP ||
+	    new_mem->mem_type == AMDGPU_PL_GDS ||
+	    new_mem->mem_type == AMDGPU_PL_GWS ||
+	    new_mem->mem_type == AMDGPU_PL_OA ||
+	    new_mem->mem_type == AMDGPU_PL_DOORBELL ||
+	    new_mem->mem_type == AMDGPU_PL_MMIO_REMAP) {
+		/* Nothing to save here */
+		amdgpu_bo_move_notify(bo, evict, new_mem);
+		ttm_bo_move_null(bo, new_mem);
+		return 0;
+	}
+
+	if (bo->type == ttm_bo_type_device &&
+	    new_mem->mem_type == TTM_PL_VRAM &&
+	    old_mem->mem_type != TTM_PL_VRAM) {
+		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
+		 * accesses the BO after it's moved.
+		 */
+		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	}
+
+	if (adev->mman.buffer_funcs_enabled &&
+	    ((old_mem->mem_type == TTM_PL_SYSTEM &&
+	      new_mem->mem_type == TTM_PL_VRAM) ||
+	     (old_mem->mem_type == TTM_PL_VRAM &&
+	      new_mem->mem_type == TTM_PL_SYSTEM))) {
+		hop->fpfn = 0;
+		hop->lpfn = 0;
+		hop->mem_type = TTM_PL_TT;
+		hop->flags = TTM_PL_FLAG_TEMPORARY;
+		return -EMULTIHOP;
+	}
+
+	amdgpu_bo_move_notify(bo, evict, new_mem);
+	if (adev->mman.buffer_funcs_enabled)
+		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
+	else
+		r = -ENODEV;
+
+	if (r) {
+		/* Check that all memory is CPU accessible */
+		if (!amdgpu_res_copyable(adev, old_mem) ||
+		    !amdgpu_res_copyable(adev, new_mem)) {
+			pr_err("Move buffer fallback to memcpy unavailable\n");
+			return r;
+		}
+
+		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
+		if (r)
+			return r;
+	}
+
+	/* update statistics after the move */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+	atomic64_add(bo->base.size, &adev->num_bytes_moved);
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
+static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
+				     struct ttm_resource *mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:
+		/* system memory */
+		return 0;
+	case TTM_PL_TT:
+	case AMDGPU_PL_PREEMPT:
+		break;
+	case TTM_PL_VRAM:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+
+		if (adev->mman.aper_base_kaddr &&
+		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
+			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+					mem->bus.offset;
+
+		mem->bus.offset += adev->gmc.aper_base;
+		mem->bus.is_iomem = true;
+		break;
+	case AMDGPU_PL_DOORBELL:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.offset += adev->doorbell.base;
+		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_uncached;
+		break;
+	case AMDGPU_PL_MMIO_REMAP:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.offset += adev->rmmio_remap.bus_addr;
+		mem->bus.is_iomem = true;
+		mem->bus.caching = ttm_uncached;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+					   unsigned long page_offset)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_res_cursor cursor;
+
+	amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
+			 &cursor);
+
+	if (bo->resource->mem_type == AMDGPU_PL_DOORBELL)
+		return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT;
+	else if (bo->resource->mem_type == AMDGPU_PL_MMIO_REMAP)
+		return ((uint64_t)(adev->rmmio_remap.bus_addr + cursor.start)) >> PAGE_SHIFT;
+
+	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
+}
+
+/**
+ * amdgpu_ttm_domain_start - Returns GPU start address
+ * @adev: amdgpu device object
+ * @type: type of the memory
+ *
+ * Returns:
+ * GPU start address of a memory domain
+ */
+
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
+{
+	switch (type) {
+	case TTM_PL_TT:
+		return adev->gmc.gart_start;
+	case TTM_PL_VRAM:
+		return adev->gmc.vram_start;
+	}
+
+	return 0;
+}
+
+/*
+ * TTM backend functions.
+ */
+struct amdgpu_ttm_tt {
+	struct ttm_tt	ttm;
+	struct drm_gem_object	*gobj;
+	u64			offset;
+	uint64_t		userptr;
+	struct task_struct	*usertask;
+	uint32_t		userflags;
+	bool			bound;
+	int32_t			pool_id;
+};
+
+#define ttm_to_amdgpu_ttm_tt(ptr)	container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/*
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
+ *
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking. Its the caller responsibility to ensure
+ * that range is a valid memory and it is freed too.
+ */
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+				 struct amdgpu_hmm_range *range)
+{
+	struct ttm_tt *ttm = bo->tbo.ttm;
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	unsigned long start = gtt->userptr;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	bool readonly;
+	int r = 0;
+
+	mm = bo->notifier.mm;
+	if (unlikely(!mm)) {
+		DRM_DEBUG_DRIVER("BO is not registered?\n");
+		return -EFAULT;
+	}
+
+	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+		return -ESRCH;
+
+	mmap_read_lock(mm);
+	vma = vma_lookup(mm, start);
+	if (unlikely(!vma)) {
+		r = -EFAULT;
+		goto out_unlock;
+	}
+	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+		vma->vm_file)) {
+		r = -EPERM;
+		goto out_unlock;
+	}
+
+	readonly = amdgpu_ttm_tt_is_readonly(ttm);
+	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
+				       readonly, NULL, range);
+out_unlock:
+	mmap_read_unlock(mm);
+	if (r)
+		pr_debug("failed %d to get user pages 0x%lx\n", r, start);
+
+	mmput(mm);
+
+	return r;
+}
+
+#endif
+
+/*
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ *
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range)
+{
+	unsigned long i;
+
+	for (i = 0; i < ttm->num_pages; ++i)
+		ttm->pages[i] = range ? hmm_pfn_to_page(range->hmm_range.hmm_pfns[i]) : NULL;
+}
+
+/*
+ * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
+				     struct ttm_tt *ttm)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	int r;
+
+	/* Allocate an SG array and squash pages into it */
+	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
+				      (u64)ttm->num_pages << PAGE_SHIFT,
+				      GFP_KERNEL);
+	if (r)
+		goto release_sg;
+
+	/* Map SG to device */
+	r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+	if (r)
+		goto release_sg_table;
+
+	/* convert SG to linear array of pages and dma addresses */
+	drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
+				       ttm->num_pages);
+
+	return 0;
+
+release_sg_table:
+	sg_free_table(ttm->sg);
+release_sg:
+	kfree(ttm->sg);
+	ttm->sg = NULL;
+	return r;
+}
+
+/*
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
+static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
+					struct ttm_tt *ttm)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	/* double check that we don't free the table twice */
+	if (!ttm->sg || !ttm->sg->sgl)
+		return;
+
+	/* unmap the pages mapped to the device */
+	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
+	sg_free_table(ttm->sg);
+}
+
+/*
+ * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
+ * MQDn+CtrlStackn where n is the number of XCCs per partition.
+ * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
+ * and uses memory type default, UC. The rest of pages_per_xcc are
+ * Ctrl stack and modify their memory type to NC.
+ */
+static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
+				struct ttm_tt *ttm, uint64_t flags)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	uint64_t total_pages = ttm->num_pages;
+	int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
+	uint64_t page_idx, pages_per_xcc;
+	int i;
+	uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+
+	pages_per_xcc = total_pages;
+	do_div(pages_per_xcc, num_xcc);
+
+	for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
+		/* MQD page: use default flags */
+		amdgpu_gart_bind(adev,
+				gtt->offset + (page_idx << PAGE_SHIFT),
+				1, &gtt->ttm.dma_address[page_idx], flags);
+		/*
+		 * Ctrl pages - modify the memory type to NC (ctrl_flags) from
+		 * the second page of the BO onward.
+		 */
+		amdgpu_gart_bind(adev,
+				gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
+				pages_per_xcc - 1,
+				&gtt->ttm.dma_address[page_idx + 1],
+				ctrl_flags);
+	}
+}
+
+static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+				 struct ttm_buffer_object *tbo,
+				 uint64_t flags)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+	struct ttm_tt *ttm = tbo->ttm;
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	if (amdgpu_bo_encrypted(abo))
+		flags |= AMDGPU_PTE_TMZ;
+
+	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
+		amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
+	} else {
+		amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+				 gtt->ttm.dma_address, flags);
+	}
+	gtt->bound = true;
+}
+
+/*
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
+static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
+				   struct ttm_tt *ttm,
+				   struct ttm_resource *bo_mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	uint64_t flags;
+	int r;
+
+	if (!bo_mem)
+		return -EINVAL;
+
+	if (gtt->bound)
+		return 0;
+
+	if (gtt->userptr) {
+		r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
+		if (r) {
+			dev_err(adev->dev, "failed to pin userptr\n");
+			return r;
+		}
+	} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
+		if (!ttm->sg) {
+			struct dma_buf_attachment *attach;
+			struct sg_table *sgt;
+
+			attach = gtt->gobj->import_attach;
+			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+			if (IS_ERR(sgt))
+				return PTR_ERR(sgt);
+
+			ttm->sg = sgt;
+		}
+
+		drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
+					       ttm->num_pages);
+	}
+
+	if (!ttm->num_pages) {
+		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
+		     ttm->num_pages, bo_mem, ttm);
+	}
+
+	if (bo_mem->mem_type != TTM_PL_TT ||
+	    !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
+		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
+		return 0;
+	}
+
+	/* compute PTE flags relevant to this BO memory */
+	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+	/* bind pages into GART page tables */
+	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
+	amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+			 gtt->ttm.dma_address, flags);
+	gtt->bound = true;
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
+ * through AGP or GART aperture.
+ *
+ * If bo is accessible through AGP aperture, then use AGP aperture
+ * to access bo; otherwise allocate logical space in GART aperture
+ * and map bo to GART aperture.
+ */
+int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
+	struct ttm_placement placement;
+	struct ttm_place placements;
+	struct ttm_resource *tmp;
+	uint64_t addr, flags;
+	int r;
+
+	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
+		return 0;
+
+	addr = amdgpu_gmc_agp_addr(bo);
+	if (addr != AMDGPU_BO_INVALID_OFFSET)
+		return 0;
+
+	/* allocate GART space */
+	placement.num_placement = 1;
+	placement.placement = &placements;
+	placements.fpfn = 0;
+	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+	placements.mem_type = TTM_PL_TT;
+	placements.flags = bo->resource->placement;
+
+	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
+	if (unlikely(r))
+		return r;
+
+	/* compute PTE flags for this buffer object */
+	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
+
+	/* Bind pages */
+	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
+	amdgpu_ttm_gart_bind(adev, bo, flags);
+	amdgpu_gart_invalidate_tlb(adev);
+	ttm_resource_free(bo, &bo->resource);
+	ttm_bo_assign_mem(bo, tmp);
+
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
+	uint64_t flags;
+
+	if (!tbo->ttm)
+		return;
+
+	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
+	amdgpu_ttm_gart_bind(adev, tbo, flags);
+}
+
+/*
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
+static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
+				      struct ttm_tt *ttm)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	/* if the pages have userptr pinning then clear that first */
+	if (gtt->userptr) {
+		amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
+	} else if (ttm->sg && drm_gem_is_imported(gtt->gobj)) {
+		struct dma_buf_attachment *attach;
+
+		attach = gtt->gobj->import_attach;
+		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+		ttm->sg = NULL;
+	}
+
+	if (!gtt->bound)
+		return;
+
+	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
+		return;
+
+	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
+	amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
+	gtt->bound = false;
+}
+
+static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
+				       struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+
+	ttm_tt_fini(&gtt->ttm);
+	kfree(gtt);
+}
+
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ * @page_flags: Page flags to be added to the ttm_tt object
+ *
+ * Called by ttm_tt_create().
+ */
+static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
+					   uint32_t page_flags)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_ttm_tt *gtt;
+	enum ttm_caching caching;
+
+	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
+	if (!gtt)
+		return NULL;
+
+	gtt->gobj = &bo->base;
+	if (adev->gmc.mem_partitions && abo->xcp_id >= 0)
+		gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id);
+	else
+		gtt->pool_id = abo->xcp_id;
+
+	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+		caching = ttm_write_combined;
+	else
+		caching = ttm_cached;
+
+	/* allocate space for the uninitialized page entries */
+	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
+		kfree(gtt);
+		return NULL;
+	}
+	return &gtt->ttm;
+}
+
+/*
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
+static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
+				  struct ttm_tt *ttm,
+				  struct ttm_operation_ctx *ctx)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	struct ttm_pool *pool;
+	pgoff_t i;
+	int ret;
+
+	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
+	if (gtt->userptr) {
+		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+		if (!ttm->sg)
+			return -ENOMEM;
+		return 0;
+	}
+
+	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return 0;
+
+	if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+		pool = &adev->mman.ttm_pools[gtt->pool_id];
+	else
+		pool = &adev->mman.bdev.pool;
+	ret = ttm_pool_alloc(pool, ttm, ctx);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ttm->num_pages; ++i)
+		ttm->pages[i]->mapping = bdev->dev_mapping;
+
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
+static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
+				     struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	struct amdgpu_device *adev;
+	struct ttm_pool *pool;
+	pgoff_t i;
+
+	amdgpu_ttm_backend_unbind(bdev, ttm);
+
+	if (gtt->userptr) {
+		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
+		kfree(ttm->sg);
+		ttm->sg = NULL;
+		return;
+	}
+
+	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return;
+
+	for (i = 0; i < ttm->num_pages; ++i)
+		ttm->pages[i]->mapping = NULL;
+
+	adev = amdgpu_ttm_adev(bdev);
+
+	if (adev->mman.ttm_pools && gtt->pool_id >= 0)
+		pool = &adev->mman.ttm_pools[gtt->pool_id];
+	else
+		pool = &adev->mman.bdev.pool;
+
+	return ttm_pool_free(pool, ttm);
+}
+
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr:  The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+			      uint64_t *user_addr)
+{
+	struct amdgpu_ttm_tt *gtt;
+
+	if (!tbo->ttm)
+		return -EINVAL;
+
+	gtt = (void *)tbo->ttm;
+	*user_addr = gtt->userptr;
+	return 0;
+}
+
+/**
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
+ * task
+ *
+ * @bo: The ttm_buffer_object to bind this userptr to
+ * @addr:  The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
+ * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
+ * initialize GPU VM for a KFD process.
+ */
+int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
+			      uint64_t addr, uint32_t flags)
+{
+	struct amdgpu_ttm_tt *gtt;
+
+	if (!bo->ttm) {
+		/* TODO: We want a separate TTM object type for userptrs */
+		bo->ttm = amdgpu_ttm_tt_create(bo, 0);
+		if (bo->ttm == NULL)
+			return -ENOMEM;
+	}
+
+	/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
+	bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
+
+	gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
+	gtt->userptr = addr;
+	gtt->userflags = flags;
+
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+	gtt->usertask = current->group_leader;
+	get_task_struct(gtt->usertask);
+
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	if (gtt == NULL)
+		return NULL;
+
+	if (gtt->usertask == NULL)
+		return NULL;
+
+	return gtt->usertask->mm;
+}
+
+/*
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
+ * address range for the current task.
+ *
+ */
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+				  unsigned long end, unsigned long *userptr)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+	unsigned long size;
+
+	if (gtt == NULL || !gtt->userptr)
+		return false;
+
+	/* Return false if no part of the ttm_tt object lies within
+	 * the range
+	 */
+	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
+	if (gtt->userptr > end || gtt->userptr + size <= start)
+		return false;
+
+	if (userptr)
+		*userptr = gtt->userptr;
+	return true;
+}
+
+/*
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
+ */
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	if (gtt == NULL || !gtt->userptr)
+		return false;
+
+	return true;
+}
+
+/*
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
+bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
+{
+	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
+
+	if (gtt == NULL)
+		return false;
+
+	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+}
+
+/**
+ * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PDE (Page Directory Entry).
+ */
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
+{
+	uint64_t flags = 0;
+
+	if (mem && mem->mem_type != TTM_PL_SYSTEM)
+		flags |= AMDGPU_PTE_VALID;
+
+	if (mem && (mem->mem_type == TTM_PL_TT ||
+		    mem->mem_type == AMDGPU_PL_DOORBELL ||
+		    mem->mem_type == AMDGPU_PL_PREEMPT ||
+		    mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
+		flags |= AMDGPU_PTE_SYSTEM;
+
+		if (ttm && ttm->caching == ttm_cached)
+			flags |= AMDGPU_PTE_SNOOPED;
+	}
+
+	if (mem && mem->mem_type == TTM_PL_VRAM &&
+			mem->bus.caching == ttm_cached)
+		flags |= AMDGPU_PTE_SNOOPED;
+
+	return flags;
+}
+
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @adev: amdgpu_device pointer
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PTE (Page Table Entry).
+ */
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+				 struct ttm_resource *mem)
+{
+	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
+
+	flags |= adev->gart.gart_pte_flags;
+	flags |= AMDGPU_PTE_READABLE;
+
+	if (!amdgpu_ttm_tt_is_readonly(ttm))
+		flags |= AMDGPU_PTE_WRITEABLE;
+
+	return flags;
+}
+
+/*
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
+ * object.
+ *
+ * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
+ * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
+ * it can find space for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
+static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
+					    const struct ttm_place *place)
+{
+	struct dma_resv_iter resv_cursor;
+	struct dma_fence *f;
+
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
+		return ttm_bo_eviction_valuable(bo, place);
+
+	/* Swapout? */
+	if (bo->resource->mem_type == TTM_PL_SYSTEM)
+		return true;
+
+	if (bo->type == ttm_bo_type_kernel &&
+	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
+		return false;
+
+	/* If bo is a KFD BO, check if the bo belongs to the current process.
+	 * If true, then return false as any KFD process needs all its BOs to
+	 * be resident to run successfully
+	 */
+	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+				DMA_RESV_USAGE_BOOKKEEP, f) {
+		if (amdkfd_fence_check_mm(f, current->mm) &&
+		    !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
+			return false;
+	}
+
+	/* Preemptible BOs don't own system resources managed by the
+	 * driver (pages, VRAM, GART space). They point to resources
+	 * owned by someone else (e.g. pageable memory in user mode
+	 * or a DMABuf). They are used in a preemptible context so we
+	 * can guarantee no deadlocks and good QoS in case of MMU
+	 * notifiers or DMABuf move notifiers from the resource owner.
+	 */
+	if (bo->resource->mem_type == AMDGPU_PL_PREEMPT)
+		return false;
+
+	if (bo->resource->mem_type == TTM_PL_TT &&
+	    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
+		return false;
+
+	return ttm_bo_eviction_valuable(bo, place);
+}
+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+				      void *buf, size_t size, bool write)
+{
+	while (size) {
+		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+		uint64_t bytes = 4 - (pos & 0x3);
+		uint32_t shift = (pos & 0x3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+		uint32_t value = 0;
+
+		if (size < bytes) {
+			mask &= 0xffffffff >> (bytes - size) * 8;
+			bytes = size;
+		}
+
+		if (mask != 0xffffffff) {
+			amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
+			if (write) {
+				value &= ~mask;
+				value |= (*(uint32_t *)buf << shift) & mask;
+				amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
+			} else {
+				value = (value & mask) >> shift;
+				memcpy(buf, &value, bytes);
+			}
+		} else {
+			amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
+		}
+
+		pos += bytes;
+		buf += bytes;
+		size -= bytes;
+	}
+}
+
+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
+					unsigned long offset, void *buf,
+					int len, int write)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct amdgpu_res_cursor src_mm;
+	struct amdgpu_job *job;
+	struct dma_fence *fence;
+	uint64_t src_addr, dst_addr;
+	unsigned int num_dw;
+	int r, idx;
+
+	if (len != PAGE_SIZE)
+		return -EINVAL;
+
+	if (!adev->mman.sdma_access_ptr)
+		return -EACCES;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return -ENODEV;
+
+	if (write)
+		memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+				     &job,
+				     AMDGPU_KERNEL_JOB_ID_TTM_ACCESS_MEMORY_SDMA);
+	if (r)
+		goto out;
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
+	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+		src_mm.start;
+	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
+	if (write)
+		swap(src_addr, dst_addr);
+
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+				PAGE_SIZE, 0);
+
+	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+
+	fence = amdgpu_job_submit(job);
+	mutex_unlock(&adev->mman.gtt_window_lock);
+
+	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+		r = -ETIMEDOUT;
+	dma_fence_put(fence);
+
+	if (!(r || write))
+		memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+	drm_dev_exit(idx);
+	return r;
+}
+
+/**
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write:  true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
+				    unsigned long offset, void *buf, int len,
+				    int write)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct amdgpu_res_cursor cursor;
+	int ret = 0;
+
+	if (bo->resource->mem_type != TTM_PL_VRAM)
+		return -EIO;
+
+	if (amdgpu_device_has_timeouts_enabled(adev) &&
+			!amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
+		return len;
+
+	amdgpu_res_first(bo->resource, offset, len, &cursor);
+	while (cursor.remaining) {
+		size_t count, size = cursor.size;
+		loff_t pos = cursor.start;
+
+		count = amdgpu_device_aper_access(adev, pos, buf, size, write);
+		size -= count;
+		if (size) {
+			/* using MM to access rest vram and handle un-aligned address */
+			pos += count;
+			buf += count;
+			amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+		}
+
+		ret += cursor.size;
+		buf += cursor.size;
+		amdgpu_res_next(&cursor, cursor.size);
+	}
+
+	return ret;
+}
+
+static void
+amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
+{
+	amdgpu_bo_move_notify(bo, false, NULL);
+}
+
+static struct ttm_device_funcs amdgpu_bo_driver = {
+	.ttm_tt_create = &amdgpu_ttm_tt_create,
+	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
+	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
+	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
+	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
+	.evict_flags = &amdgpu_evict_flags,
+	.move = &amdgpu_bo_move,
+	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
+	.release_notify = &amdgpu_bo_release_notify,
+	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
+	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
+	.access_memory = &amdgpu_ttm_access_memory,
+};
+
+/*
+ * Firmware Reservation functions
+ */
+/**
+ * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free fw reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
+		NULL, &adev->mman.fw_vram_usage_va);
+}
+
+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+						  NULL,
+						  &adev->mman.drv_vram_usage_va);
+}
+
+/**
+ * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from fw.
+ */
+static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
+{
+	uint64_t vram_size = adev->gmc.visible_vram_size;
+
+	adev->mman.fw_vram_usage_va = NULL;
+	adev->mman.fw_vram_usage_reserved_bo = NULL;
+
+	if (adev->mman.fw_vram_usage_size == 0 ||
+	    adev->mman.fw_vram_usage_size > vram_size)
+		return 0;
+
+	return amdgpu_bo_create_kernel_at(adev,
+					  adev->mman.fw_vram_usage_start_offset,
+					  adev->mman.fw_vram_usage_size,
+					  &adev->mman.fw_vram_usage_reserved_bo,
+					  &adev->mman.fw_vram_usage_va);
+}
+
+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+	u64 vram_size = adev->gmc.visible_vram_size;
+
+	adev->mman.drv_vram_usage_va = NULL;
+	adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+	if (adev->mman.drv_vram_usage_size == 0 ||
+	    adev->mman.drv_vram_usage_size > vram_size)
+		return 0;
+
+	return amdgpu_bo_create_kernel_at(adev,
+					  adev->mman.drv_vram_usage_start_offset,
+					  adev->mman.drv_vram_usage_size,
+					  &adev->mman.drv_vram_usage_reserved_bo,
+					  &adev->mman.drv_vram_usage_va);
+}
+
+/*
+ * Memoy training reservation functions
+ */
+
+/**
+ * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free memory training reserved vram if it has been reserved.
+ */
+static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
+	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
+	ctx->c2p_bo = NULL;
+
+	return 0;
+}
+
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev,
+						uint32_t reserve_size)
+{
+	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->c2p_train_data_offset =
+		ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M);
+	ctx->p2c_train_data_offset =
+		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+	ctx->train_data_size =
+		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+			ctx->train_data_size,
+			ctx->p2c_train_data_offset,
+			ctx->c2p_train_data_offset);
+}
+
+/*
+ * reserve TMR memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
+ */
+static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
+{
+	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+	bool mem_train_support = false;
+	uint32_t reserve_size = 0;
+	int ret;
+
+	if (adev->bios && !amdgpu_sriov_vf(adev)) {
+		if (amdgpu_atomfirmware_mem_training_supported(adev))
+			mem_train_support = true;
+		else
+			DRM_DEBUG("memory training does not support!\n");
+	}
+
+	/*
+	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
+	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
+	 *
+	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
+	 * discovery data and G6 memory training data respectively
+	 */
+	if (adev->bios)
+		reserve_size =
+			amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+
+	if (!adev->bios &&
+	    (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+	     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+	     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
+		reserve_size = max(reserve_size, (uint32_t)280 << 20);
+	else if (!reserve_size)
+		reserve_size = DISCOVERY_TMR_OFFSET;
+
+	if (mem_train_support) {
+		/* reserve vram for mem train according to TMR location */
+		amdgpu_ttm_training_data_block_init(adev, reserve_size);
+		ret = amdgpu_bo_create_kernel_at(adev,
+						 ctx->c2p_train_data_offset,
+						 ctx->train_data_size,
+						 &ctx->c2p_bo,
+						 NULL);
+		if (ret) {
+			dev_err(adev->dev, "alloc c2p_bo failed(%d)!\n", ret);
+			amdgpu_ttm_training_reserve_vram_fini(adev);
+			return ret;
+		}
+		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+	}
+
+	ret = amdgpu_bo_create_kernel_at(
+		adev, adev->gmc.real_vram_size - reserve_size, reserve_size,
+		&adev->mman.fw_reserved_memory, NULL);
+	if (ret) {
+		dev_err(adev->dev, "alloc tmr failed(%d)!\n", ret);
+		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+				      NULL);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int amdgpu_ttm_pools_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions)
+		return 0;
+
+	adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions,
+				       sizeof(*adev->mman.ttm_pools),
+				       GFP_KERNEL);
+	if (!adev->mman.ttm_pools)
+		return -ENOMEM;
+
+	for (i = 0; i < adev->gmc.num_mem_partitions; i++) {
+		ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev,
+			      adev->gmc.mem_partitions[i].numa.node,
+			      TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
+	}
+	return 0;
+}
+
+static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools)
+		return;
+
+	for (i = 0; i < adev->gmc.num_mem_partitions; i++)
+		ttm_pool_fini(&adev->mman.ttm_pools[i]);
+
+	kfree(adev->mman.ttm_pools);
+	adev->mman.ttm_pools = NULL;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_init - Allocate the singleton 4K MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Allocates a one-page (4K) GEM BO in AMDGPU_GEM_DOMAIN_MMIO_REMAP when the
+ * hardware exposes a remap base (adev->rmmio_remap.bus_addr) and the host
+ * PAGE_SIZE is <= AMDGPU_GPU_PAGE_SIZE (4K). The BO is created as a regular
+ * GEM object (amdgpu_bo_create).
+ *
+ * Return:
+ *  * 0 on success or intentional skip (feature not present/unsupported)
+ *  * negative errno on allocation failure
+ */
+static int amdgpu_ttm_mmio_remap_bo_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_bo_param bp;
+	int r;
+
+	/* Skip if HW doesn't expose remap, or if PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE (4K). */
+	if (!adev->rmmio_remap.bus_addr || PAGE_SIZE > AMDGPU_GPU_PAGE_SIZE)
+		return 0;
+
+	memset(&bp, 0, sizeof(bp));
+
+	/* Create exactly one GEM BO in the MMIO_REMAP domain. */
+	bp.type        = ttm_bo_type_device;          /* userspace-mappable GEM */
+	bp.size        = AMDGPU_GPU_PAGE_SIZE;        /* 4K */
+	bp.byte_align  = AMDGPU_GPU_PAGE_SIZE;
+	bp.domain      = AMDGPU_GEM_DOMAIN_MMIO_REMAP;
+	bp.flags       = 0;
+	bp.resv        = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	r = amdgpu_bo_create(adev, &bp, &adev->rmmio_remap.bo);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+/**
+ * amdgpu_ttm_mmio_remap_bo_fini - Free the singleton MMIO_REMAP BO
+ * @adev: amdgpu device
+ *
+ * Frees the kernel-owned MMIO_REMAP BO if it was allocated by
+ * amdgpu_ttm_mmio_remap_bo_init().
+ */
+static void amdgpu_ttm_mmio_remap_bo_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_unref(&adev->rmmio_remap.bo);
+	adev->rmmio_remap.bo = NULL;
+}
+
+/*
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as various
+ * gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
+int amdgpu_ttm_init(struct amdgpu_device *adev)
+{
+	uint64_t gtt_size;
+	int r;
+
+	mutex_init(&adev->mman.gtt_window_lock);
+
+	dma_set_max_seg_size(adev->dev, UINT_MAX);
+	/* No others user of address space so set it to 0 */
+	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
+			       adev_to_drm(adev)->anon_inode->i_mapping,
+			       adev_to_drm(adev)->vma_offset_manager,
+			       (adev->need_swiotlb ?
+				TTM_ALLOCATION_POOL_USE_DMA_ALLOC : 0) |
+			       (dma_addressing_limited(adev->dev) ?
+				TTM_ALLOCATION_POOL_USE_DMA32 : 0) |
+			       TTM_ALLOCATION_POOL_BENEFICIAL_ORDER(get_order(SZ_2M)));
+	if (r) {
+		dev_err(adev->dev,
+			"failed initializing buffer object driver(%d).\n", r);
+		return r;
+	}
+
+	r = amdgpu_ttm_pools_init(adev);
+	if (r) {
+		dev_err(adev->dev, "failed to init ttm pools(%d).\n", r);
+		return r;
+	}
+	adev->mman.initialized = true;
+
+	if (!adev->gmc.is_app_apu) {
+		/* Initialize VRAM pool with all of VRAM divided into pages */
+		r = amdgpu_vram_mgr_init(adev);
+		if (r) {
+			dev_err(adev->dev, "Failed initializing VRAM heap.\n");
+			return r;
+		}
+	}
+
+	/* Change the size here instead of the init above so only lpfn is affected */
+	amdgpu_ttm_set_buffer_funcs_status(adev, false);
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_X86
+	if (adev->gmc.xgmi.connected_to_cpu)
+		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
+				adev->gmc.visible_vram_size);
+
+	else if (adev->gmc.is_app_apu)
+		DRM_DEBUG_DRIVER(
+			"No need to ioremap when real vram size is 0\n");
+	else
+#endif
+		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+				adev->gmc.visible_vram_size);
+#endif
+
+	/*
+	 *The reserved vram for firmware must be pinned to the specified
+	 *place on the VRAM, so reserve it early.
+	 */
+	r = amdgpu_ttm_fw_reserve_vram_init(adev);
+	if (r)
+		return r;
+
+	/*
+	 * The reserved VRAM for the driver must be pinned to a specific
+	 * location in VRAM, so reserve it early.
+	 */
+	r = amdgpu_ttm_drv_reserve_vram_init(adev);
+	if (r)
+		return r;
+
+	/*
+	 * only NAVI10 and later ASICs support IP discovery.
+	 * If IP discovery is enabled, a block of memory should be
+	 * reserved for it.
+	 */
+	if (adev->discovery.reserve_tmr) {
+		r = amdgpu_ttm_reserve_tmr(adev);
+		if (r)
+			return r;
+	}
+
+	/* allocate memory as required for VGA
+	 * This is used for VGA emulation and pre-OS scanout buffers to
+	 * avoid display artifacts while transitioning between pre-OS
+	 * and driver.
+	 */
+	if (!adev->gmc.is_app_apu) {
+		r = amdgpu_bo_create_kernel_at(adev, 0,
+					       adev->mman.stolen_vga_size,
+					       &adev->mman.stolen_vga_memory,
+					       NULL);
+		if (r)
+			return r;
+
+		r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+					       adev->mman.stolen_extended_size,
+					       &adev->mman.stolen_extended_memory,
+					       NULL);
+
+		if (r)
+			return r;
+
+		r = amdgpu_bo_create_kernel_at(adev,
+					       adev->mman.stolen_reserved_offset,
+					       adev->mman.stolen_reserved_size,
+					       &adev->mman.stolen_reserved_memory,
+					       NULL);
+		if (r)
+			return r;
+	} else {
+		DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+	}
+
+	dev_info(adev->dev, "amdgpu: %uM of VRAM memory ready\n",
+		 (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024)));
+
+	/* Compute GTT size, either based on TTM limit
+	 * or whatever the user passed on module init.
+	 */
+	gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+	if (amdgpu_gtt_size != -1) {
+		uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+		drm_warn(&adev->ddev,
+			"Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+		if (gtt_size != configured_size)
+			drm_warn(&adev->ddev,
+				"GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+				configured_size, gtt_size);
+
+		gtt_size = configured_size;
+	}
+
+	/* Initialize GTT memory pool */
+	r = amdgpu_gtt_mgr_init(adev, gtt_size);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing GTT heap.\n");
+		return r;
+	}
+	dev_info(adev->dev, "amdgpu: %uM of GTT memory ready.\n",
+		 (unsigned int)(gtt_size / (1024 * 1024)));
+
+	if (adev->flags & AMD_IS_APU) {
+		if (adev->gmc.real_vram_size < gtt_size)
+			adev->apu_prefer_gtt = true;
+	}
+
+	/* Initialize doorbell pool on PCI BAR */
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing doorbell heap.\n");
+		return r;
+	}
+
+	/* Create a boorbell page for kernel usages */
+	r = amdgpu_doorbell_create_kernel_doorbells(adev);
+	if (r) {
+		dev_err(adev->dev, "Failed to initialize kernel doorbells.\n");
+		return r;
+	}
+
+	/* Initialize MMIO-remap pool (single page 4K) */
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_MMIO_REMAP, 1);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing MMIO-remap heap.\n");
+		return r;
+	}
+
+	/* Allocate the singleton MMIO_REMAP BO (4K) if supported */
+	r = amdgpu_ttm_mmio_remap_bo_init(adev);
+	if (r)
+		return r;
+
+	/* Initialize preemptible memory pool */
+	r = amdgpu_preempt_mgr_init(adev);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing PREEMPT heap.\n");
+		return r;
+	}
+
+	/* Initialize various on-chip memory pools */
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing GDS heap.\n");
+		return r;
+	}
+
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing gws heap.\n");
+		return r;
+	}
+
+	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
+	if (r) {
+		dev_err(adev->dev, "Failed initializing oa heap.\n");
+		return r;
+	}
+	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				AMDGPU_GEM_DOMAIN_GTT,
+				&adev->mman.sdma_access_bo, NULL,
+				&adev->mman.sdma_access_ptr))
+		DRM_WARN("Debug VRAM access will use slowpath MM access\n");
+
+	return 0;
+}
+
+/*
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
+void amdgpu_ttm_fini(struct amdgpu_device *adev)
+{
+	int idx;
+
+	if (!adev->mman.initialized)
+		return;
+
+	amdgpu_ttm_pools_fini(adev);
+
+	amdgpu_ttm_training_reserve_vram_fini(adev);
+	/* return the stolen vga memory back to VRAM */
+	if (!adev->gmc.is_app_apu) {
+		amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+		amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+		/* return the FW reserved memory back to VRAM */
+		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL,
+				      NULL);
+		amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory_extend, NULL,
+				      NULL);
+		if (adev->mman.stolen_reserved_size)
+			amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+					      NULL, NULL);
+	}
+	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
+					&adev->mman.sdma_access_ptr);
+
+	amdgpu_ttm_mmio_remap_bo_fini(adev);
+	amdgpu_ttm_fw_reserve_vram_fini(adev);
+	amdgpu_ttm_drv_reserve_vram_fini(adev);
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+
+		if (adev->mman.aper_base_kaddr)
+			iounmap(adev->mman.aper_base_kaddr);
+		adev->mman.aper_base_kaddr = NULL;
+
+		drm_dev_exit(idx);
+	}
+
+	if (!adev->gmc.is_app_apu)
+		amdgpu_vram_mgr_fini(adev);
+	amdgpu_gtt_mgr_fini(adev);
+	amdgpu_preempt_mgr_fini(adev);
+	amdgpu_doorbell_fini(adev);
+
+	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
+	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
+	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
+	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_MMIO_REMAP);
+	ttm_device_fini(&adev->mman.bdev);
+	adev->mman.initialized = false;
+	dev_info(adev->dev, "amdgpu: ttm finalized\n");
+}
+
+/**
+ * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: true when we can use buffer functions.
+ *
+ * Enable/disable use of buffer functions during suspend/resume. This should
+ * only be called at bootup or when userspace isn't running.
+ */
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
+{
+	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+	uint64_t size;
+	int r;
+
+	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
+	    adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
+		return;
+
+	if (enable) {
+		struct amdgpu_ring *ring;
+		struct drm_gpu_scheduler *sched;
+
+		ring = adev->mman.buffer_funcs_ring;
+		sched = &ring->sched;
+		r = drm_sched_entity_init(&adev->mman.high_pr,
+					  DRM_SCHED_PRIORITY_KERNEL, &sched,
+					  1, NULL);
+		if (r) {
+			dev_err(adev->dev,
+				"Failed setting up TTM BO move entity (%d)\n",
+				r);
+			return;
+		}
+
+		r = drm_sched_entity_init(&adev->mman.low_pr,
+					  DRM_SCHED_PRIORITY_NORMAL, &sched,
+					  1, NULL);
+		if (r) {
+			dev_err(adev->dev,
+				"Failed setting up TTM BO move entity (%d)\n",
+				r);
+			goto error_free_entity;
+		}
+	} else {
+		drm_sched_entity_destroy(&adev->mman.high_pr);
+		drm_sched_entity_destroy(&adev->mman.low_pr);
+		/* Drop all the old fences since re-creating the scheduler entities
+		 * will allocate new contexts.
+		 */
+		ttm_resource_manager_cleanup(man);
+	}
+
+	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
+	if (enable)
+		size = adev->gmc.real_vram_size;
+	else
+		size = adev->gmc.visible_vram_size;
+	man->size = size;
+	adev->mman.buffer_funcs_enabled = enable;
+
+	return;
+
+error_free_entity:
+	drm_sched_entity_destroy(&adev->mman.high_pr);
+}
+
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+				  bool direct_submit,
+				  unsigned int num_dw,
+				  struct dma_resv *resv,
+				  bool vm_needs_flush,
+				  struct amdgpu_job **job,
+				  bool delayed, u64 k_job_id)
+{
+	enum amdgpu_ib_pool_type pool = direct_submit ?
+		AMDGPU_IB_POOL_DIRECT :
+		AMDGPU_IB_POOL_DELAYED;
+	int r;
+	struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
+						    &adev->mman.high_pr;
+	r = amdgpu_job_alloc_with_ib(adev, entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4, pool, job, k_job_id);
+	if (r)
+		return r;
+
+	if (vm_needs_flush) {
+		(*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+							adev->gmc.pdb0_bo :
+							adev->gart.bo);
+		(*job)->vm_needs_flush = true;
+	}
+	if (!resv)
+		return 0;
+
+	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
+						   DMA_RESV_USAGE_BOOKKEEP);
+}
+
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+		       uint64_t dst_offset, uint32_t byte_count,
+		       struct dma_resv *resv,
+		       struct dma_fence **fence, bool direct_submit,
+		       bool vm_needs_flush, uint32_t copy_flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned int num_loops, num_dw;
+	struct amdgpu_job *job;
+	uint32_t max_bytes;
+	unsigned int i;
+	int r;
+
+	if (!direct_submit && !ring->sched.ready) {
+		dev_err(adev->dev,
+			"Trying to move memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
+	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
+	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
+	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
+	r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
+				   resv, vm_needs_flush, &job, false,
+				   AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER);
+	if (r)
+		return r;
+
+	for (i = 0; i < num_loops; i++) {
+		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
+
+		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
+					dst_offset, cur_size_in_bytes, copy_flags);
+		src_offset += cur_size_in_bytes;
+		dst_offset += cur_size_in_bytes;
+		byte_count -= cur_size_in_bytes;
+	}
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+	if (direct_submit)
+		r = amdgpu_job_submit_direct(job, ring, fence);
+	else
+		*fence = amdgpu_job_submit(job);
+	if (r)
+		goto error_free;
+
+	return r;
+
+error_free:
+	amdgpu_job_free(job);
+	dev_err(adev->dev, "Error scheduling IBs (%d)\n", r);
+	return r;
+}
+
+static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
+			       uint64_t dst_addr, uint32_t byte_count,
+			       struct dma_resv *resv,
+			       struct dma_fence **fence,
+			       bool vm_needs_flush, bool delayed,
+			       u64 k_job_id)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned int num_loops, num_dw;
+	struct amdgpu_job *job;
+	uint32_t max_bytes;
+	unsigned int i;
+	int r;
+
+	max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+	num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
+	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
+	r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
+				   &job, delayed, k_job_id);
+	if (r)
+		return r;
+
+	for (i = 0; i < num_loops; i++) {
+		uint32_t cur_size = min(byte_count, max_bytes);
+
+		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
+					cur_size);
+
+		dst_addr += cur_size;
+		byte_count -= cur_size;
+	}
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+	*fence = amdgpu_job_submit(job);
+	return 0;
+}
+
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct amdgpu_res_cursor cursor;
+	u64 addr;
+	int r = 0;
+
+	if (!adev->mman.buffer_funcs_enabled)
+		return -EINVAL;
+
+	if (!fence)
+		return -EINVAL;
+
+	*fence = dma_fence_get_stub();
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (cursor.remaining) {
+		struct dma_fence *next = NULL;
+		u64 size;
+
+		if (amdgpu_res_cleared(&cursor)) {
+			amdgpu_res_next(&cursor, cursor.size);
+			continue;
+		}
+
+		/* Never clear more than 256MiB at once to avoid timeouts */
+		size = min(cursor.size, 256ULL << 20);
+
+		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+					  1, ring, false, &size, &addr);
+		if (r)
+			goto err;
+
+		r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+					&next, true, true,
+					AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
+		if (r)
+			goto err;
+
+		dma_fence_put(*fence);
+		*fence = next;
+
+		amdgpu_res_next(&cursor, size);
+	}
+err:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+
+	return r;
+}
+
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+			uint32_t src_data,
+			struct dma_resv *resv,
+			struct dma_fence **f,
+			bool delayed,
+			u64 k_job_id)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	struct dma_fence *fence = NULL;
+	struct amdgpu_res_cursor dst;
+	int r;
+
+	if (!adev->mman.buffer_funcs_enabled) {
+		dev_err(adev->dev,
+			"Trying to clear memory with ring turned off.\n");
+		return -EINVAL;
+	}
+
+	amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+	while (dst.remaining) {
+		struct dma_fence *next;
+		uint64_t cur_size, to;
+
+		/* Never fill more than 256MiB at once to avoid timeouts */
+		cur_size = min(dst.size, 256ULL << 20);
+
+		r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst,
+					  1, ring, false, &cur_size, &to);
+		if (r)
+			goto error;
+
+		r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
+					&next, true, delayed, k_job_id);
+		if (r)
+			goto error;
+
+		dma_fence_put(fence);
+		fence = next;
+
+		amdgpu_res_next(&dst, cur_size);
+	}
+error:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (f)
+		*f = dma_fence_get(fence);
+	dma_fence_put(fence);
+	return r;
+}
+
+/**
+ * amdgpu_ttm_evict_resources - evict memory buffers
+ * @adev: amdgpu device object
+ * @mem_type: evicted BO's memory type
+ *
+ * Evicts all @mem_type buffers on the lru list of the memory type.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
+{
+	struct ttm_resource_manager *man;
+
+	switch (mem_type) {
+	case TTM_PL_VRAM:
+	case TTM_PL_TT:
+	case AMDGPU_PL_GWS:
+	case AMDGPU_PL_GDS:
+	case AMDGPU_PL_OA:
+		man = ttm_manager_type(&adev->mman.bdev, mem_type);
+		break;
+	default:
+		dev_err(adev->dev, "Trying to evict invalid memory type\n");
+		return -EINVAL;
+	}
+
+	return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
+{
+	struct amdgpu_device *adev = m->private;
+
+	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
+}
+
+DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
+
+/*
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
+static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
+				    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (*pos >= adev->gmc.mc_vram_size)
+		return -ENXIO;
+
+	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
+	while (size) {
+		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
+		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
+
+		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
+		if (copy_to_user(buf, value, bytes))
+			return -EFAULT;
+
+		result += bytes;
+		buf += bytes;
+		*pos += bytes;
+		size -= bytes;
+	}
+
+	return result;
+}
+
+/*
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
+static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
+				    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	if (*pos >= adev->gmc.mc_vram_size)
+		return -ENXIO;
+
+	while (size) {
+		uint32_t value;
+
+		if (*pos >= adev->gmc.mc_vram_size)
+			return result;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			return r;
+
+		amdgpu_device_mm_access(adev, *pos, &value, 4, true);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	return result;
+}
+
+static const struct file_operations amdgpu_ttm_vram_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_ttm_vram_read,
+	.write = amdgpu_ttm_vram_write,
+	.llseek = default_llseek,
+};
+
+/*
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
+
+	/* retrieve the IOMMU domain if any for this device */
+	dom = iommu_get_domain_for_dev(adev->dev);
+
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = min(bytes, size);
+
+		/* Translate the bus address to a physical address.  If
+		 * the domain is NULL it means there is no IOMMU active
+		 * and the address translation is the identity
+		 */
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap_local_page(p);
+		r = copy_to_user(buf, ptr + off, bytes);
+		kunmap_local(ptr);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
+}
+
+/*
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
+static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
+				 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
+
+	dom = iommu_get_domain_for_dev(adev->dev);
+
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
+
+		bytes = min(bytes, size);
+
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap_local_page(p);
+		r = copy_from_user(ptr + off, buf, bytes);
+		kunmap_local(ptr);
+		if (r)
+			return -EFAULT;
+
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
+}
+
+static const struct file_operations amdgpu_ttm_iomem_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_iomem_read,
+	.write = amdgpu_iomem_write,
+	.llseek = default_llseek
+};
+
+#endif
+
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+
+	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
+				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
+	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
+			    &amdgpu_ttm_iomem_fops);
+	debugfs_create_file("ttm_page_pool", 0444, root, adev,
+			    &amdgpu_ttm_page_pool_fops);
+	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+							     TTM_PL_VRAM),
+					    root, "amdgpu_vram_mm");
+	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+							     TTM_PL_TT),
+					    root, "amdgpu_gtt_mm");
+	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+							     AMDGPU_PL_GDS),
+					    root, "amdgpu_gds_mm");
+	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+							     AMDGPU_PL_GWS),
+					    root, "amdgpu_gws_mm");
+	ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+							     AMDGPU_PL_OA),
+					    root, "amdgpu_oa_mm");
+
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
new file mode 100644
index 000000000000..577ee04ce0bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_TTM_H__
+#define __AMDGPU_TTM_H__
+
+#include <linux/dma-direction.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/ttm/ttm_placement.h>
+#include "amdgpu_vram_mgr.h"
+#include "amdgpu_hmm.h"
+
+#define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
+#define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
+#define AMDGPU_PL_OA		(TTM_PL_PRIV + 2)
+#define AMDGPU_PL_PREEMPT	(TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DOORBELL	(TTM_PL_PRIV + 4)
+#define AMDGPU_PL_MMIO_REMAP	(TTM_PL_PRIV + 5)
+#define __AMDGPU_PL_NUM	(TTM_PL_PRIV + 6)
+
+#define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
+
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+
+struct hmm_range;
+
+struct amdgpu_gtt_mgr {
+	struct ttm_resource_manager manager;
+	struct drm_mm mm;
+	spinlock_t lock;
+};
+
+struct amdgpu_mman {
+	struct ttm_device		bdev;
+	struct ttm_pool			*ttm_pools;
+	bool				initialized;
+	void __iomem			*aper_base_kaddr;
+
+	/* buffer handling */
+	const struct amdgpu_buffer_funcs	*buffer_funcs;
+	struct amdgpu_ring			*buffer_funcs_ring;
+	bool					buffer_funcs_enabled;
+
+	struct mutex				gtt_window_lock;
+	/* High priority scheduler entity for buffer moves */
+	struct drm_sched_entity			high_pr;
+	/* Low priority scheduler entity for VRAM clearing */
+	struct drm_sched_entity			low_pr;
+
+	struct amdgpu_vram_mgr vram_mgr;
+	struct amdgpu_gtt_mgr gtt_mgr;
+	struct ttm_resource_manager preempt_mgr;
+
+	uint64_t		stolen_vga_size;
+	struct amdgpu_bo	*stolen_vga_memory;
+	uint64_t		stolen_extended_size;
+	struct amdgpu_bo	*stolen_extended_memory;
+	bool			keep_stolen_vga_memory;
+
+	struct amdgpu_bo	*stolen_reserved_memory;
+	uint64_t		stolen_reserved_offset;
+	uint64_t		stolen_reserved_size;
+
+	/* fw reserved memory */
+	struct amdgpu_bo		*fw_reserved_memory;
+	struct amdgpu_bo		*fw_reserved_memory_extend;
+
+	/* firmware VRAM reservation */
+	u64		fw_vram_usage_start_offset;
+	u64		fw_vram_usage_size;
+	struct amdgpu_bo	*fw_vram_usage_reserved_bo;
+	void		*fw_vram_usage_va;
+
+	/* driver VRAM reservation */
+	u64		drv_vram_usage_start_offset;
+	u64		drv_vram_usage_size;
+	struct amdgpu_bo	*drv_vram_usage_reserved_bo;
+	void		*drv_vram_usage_va;
+
+	/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
+	struct amdgpu_bo	*sdma_access_bo;
+	void			*sdma_access_ptr;
+};
+
+struct amdgpu_copy_mem {
+	struct ttm_buffer_object	*bo;
+	struct ttm_resource		*mem;
+	unsigned long			offset;
+};
+
+#define AMDGPU_COPY_FLAGS_TMZ		(1 << 0)
+#define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED	(1 << 1)
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED	(1 << 2)
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT		3
+#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK		0x03
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT		5
+#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK		0x07
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT		8
+#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK		0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT	14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK	0x1
+
+#define AMDGPU_COPY_FLAGS_SET(field, value) \
+	(((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
+#define AMDGPU_COPY_FLAGS_GET(value, field) \
+	(((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK)
+
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
+
+bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
+void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr);
+
+uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
+
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+			      struct ttm_resource *mem,
+			      u64 offset, u64 size,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt);
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table *sgt);
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr);
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
+				  uint64_t start, uint64_t size);
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
+				      uint64_t start);
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev);
+
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+			    struct ttm_resource *res);
+
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+					bool enable);
+int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+		       uint64_t dst_offset, uint32_t byte_count,
+		       struct dma_resv *resv,
+		       struct dma_fence **fence, bool direct_submit,
+		       bool vm_needs_flush, uint32_t copy_flags);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+			    struct dma_resv *resv,
+			    struct dma_fence **fence);
+int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+			uint32_t src_data,
+			struct dma_resv *resv,
+			struct dma_fence **fence,
+			bool delayed,
+			u64 k_job_id);
+
+int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
+void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+				 struct amdgpu_hmm_range *range);
+#else
+static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+					       struct amdgpu_hmm_range *range)
+{
+	return -EPERM;
+}
+#endif
+
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+			      uint64_t *user_addr);
+int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
+			      uint64_t addr, uint32_t flags);
+bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
+				  unsigned long end, unsigned long *userptr);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+				       int *last_invalidated);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem);
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+				 struct ttm_resource *mem);
+int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
+
+void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
new file mode 100644
index 000000000000..e96f24e9ad57
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -0,0 +1,1516 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+
+#define AMDGPU_UCODE_NAME_MAX		(128)
+
+static const struct kicker_device kicker_device_list[] = {
+	{0x744B, 0x00},
+	{0x7551, 0xC8}
+};
+
+static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
+{
+	DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
+	DRM_DEBUG("header_size_bytes: %u\n", le32_to_cpu(hdr->header_size_bytes));
+	DRM_DEBUG("header_version_major: %u\n", le16_to_cpu(hdr->header_version_major));
+	DRM_DEBUG("header_version_minor: %u\n", le16_to_cpu(hdr->header_version_minor));
+	DRM_DEBUG("ip_version_major: %u\n", le16_to_cpu(hdr->ip_version_major));
+	DRM_DEBUG("ip_version_minor: %u\n", le16_to_cpu(hdr->ip_version_minor));
+	DRM_DEBUG("ucode_version: 0x%08x\n", le32_to_cpu(hdr->ucode_version));
+	DRM_DEBUG("ucode_size_bytes: %u\n", le32_to_cpu(hdr->ucode_size_bytes));
+	DRM_DEBUG("ucode_array_offset_bytes: %u\n",
+		  le32_to_cpu(hdr->ucode_array_offset_bytes));
+	DRM_DEBUG("crc32: 0x%08x\n", le32_to_cpu(hdr->crc32));
+}
+
+void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("MC\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct mc_firmware_header_v1_0 *mc_hdr =
+			container_of(hdr, struct mc_firmware_header_v1_0, header);
+
+		DRM_DEBUG("io_debug_size_bytes: %u\n",
+			  le32_to_cpu(mc_hdr->io_debug_size_bytes));
+		DRM_DEBUG("io_debug_array_offset_bytes: %u\n",
+			  le32_to_cpu(mc_hdr->io_debug_array_offset_bytes));
+	} else {
+		DRM_ERROR("Unknown MC ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+	const struct smc_firmware_header_v1_0 *v1_0_hdr;
+	const struct smc_firmware_header_v2_0 *v2_0_hdr;
+	const struct smc_firmware_header_v2_1 *v2_1_hdr;
+
+	DRM_DEBUG("SMC\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, header);
+		DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(v1_0_hdr->ucode_start_addr));
+	} else if (version_major == 2) {
+		switch (version_minor) {
+		case 0:
+			v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header);
+			DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes));
+			DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes));
+			break;
+		case 1:
+			v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header);
+			DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count));
+			DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset));
+			break;
+		default:
+			break;
+		}
+
+	} else {
+		DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("GFX\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct gfx_firmware_header_v1_0 *gfx_hdr =
+			container_of(hdr, struct gfx_firmware_header_v1_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(gfx_hdr->ucode_feature_version));
+		DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(gfx_hdr->jt_offset));
+		DRM_DEBUG("jt_size: %u\n", le32_to_cpu(gfx_hdr->jt_size));
+	} else if (version_major == 2) {
+		const struct gfx_firmware_header_v2_0 *gfx_hdr =
+			container_of(hdr, struct gfx_firmware_header_v2_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(gfx_hdr->ucode_feature_version));
+	} else {
+		DRM_ERROR("Unknown GFX ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("RLC\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct rlc_firmware_header_v1_0 *rlc_hdr =
+			container_of(hdr, struct rlc_firmware_header_v1_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(rlc_hdr->ucode_feature_version));
+		DRM_DEBUG("save_and_restore_offset: %u\n",
+			  le32_to_cpu(rlc_hdr->save_and_restore_offset));
+		DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+			  le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+		DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+			  le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+		DRM_DEBUG("master_pkt_description_offset: %u\n",
+			  le32_to_cpu(rlc_hdr->master_pkt_description_offset));
+	} else if (version_major == 2) {
+		const struct rlc_firmware_header_v2_0 *rlc_hdr =
+			container_of(hdr, struct rlc_firmware_header_v2_0, header);
+		const struct rlc_firmware_header_v2_1 *rlc_hdr_v2_1 =
+			container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+		const struct rlc_firmware_header_v2_2 *rlc_hdr_v2_2 =
+			container_of(rlc_hdr_v2_1, struct rlc_firmware_header_v2_2, v2_1);
+		const struct rlc_firmware_header_v2_3 *rlc_hdr_v2_3 =
+			container_of(rlc_hdr_v2_2, struct rlc_firmware_header_v2_3, v2_2);
+		const struct rlc_firmware_header_v2_4 *rlc_hdr_v2_4 =
+			container_of(rlc_hdr_v2_3, struct rlc_firmware_header_v2_4, v2_3);
+
+		switch (version_minor) {
+		case 0:
+			/* rlc_hdr v2_0 */
+			DRM_DEBUG("ucode_feature_version: %u\n",
+				  le32_to_cpu(rlc_hdr->ucode_feature_version));
+			DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(rlc_hdr->jt_offset));
+			DRM_DEBUG("jt_size: %u\n", le32_to_cpu(rlc_hdr->jt_size));
+			DRM_DEBUG("save_and_restore_offset: %u\n",
+				  le32_to_cpu(rlc_hdr->save_and_restore_offset));
+			DRM_DEBUG("clear_state_descriptor_offset: %u\n",
+				  le32_to_cpu(rlc_hdr->clear_state_descriptor_offset));
+			DRM_DEBUG("avail_scratch_ram_locations: %u\n",
+				  le32_to_cpu(rlc_hdr->avail_scratch_ram_locations));
+			DRM_DEBUG("reg_restore_list_size: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_restore_list_size));
+			DRM_DEBUG("reg_list_format_start: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_start));
+			DRM_DEBUG("reg_list_format_separate_start: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_separate_start));
+			DRM_DEBUG("starting_offsets_start: %u\n",
+				  le32_to_cpu(rlc_hdr->starting_offsets_start));
+			DRM_DEBUG("reg_list_format_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_size_bytes));
+			DRM_DEBUG("reg_list_format_array_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+			DRM_DEBUG("reg_list_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_size_bytes));
+			DRM_DEBUG("reg_list_array_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+			DRM_DEBUG("reg_list_format_separate_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_separate_size_bytes));
+			DRM_DEBUG("reg_list_format_separate_array_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
+			DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+			DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+			break;
+		case 1:
+			/* rlc_hdr v2_1 */
+			DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->reg_list_format_direct_reg_list_length));
+			DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_ucode_ver));
+			DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_feature_ver));
+			DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_size_bytes));
+			DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_cntl_offset_bytes));
+			DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_ucode_ver));
+			DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_feature_ver));
+			DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_size_bytes));
+			DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_gpm_offset_bytes));
+			DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_ucode_ver));
+			DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_feature_ver));
+			DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_size_bytes));
+			DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_1->save_restore_list_srm_offset_bytes));
+			break;
+		case 2:
+			/* rlc_hdr v2_2 */
+			DRM_DEBUG("rlc_iram_ucode_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_size_bytes));
+			DRM_DEBUG("rlc_iram_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_2->rlc_iram_ucode_offset_bytes));
+			DRM_DEBUG("rlc_dram_ucode_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_size_bytes));
+			DRM_DEBUG("rlc_dram_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_2->rlc_dram_ucode_offset_bytes));
+			break;
+		case 3:
+			/* rlc_hdr v2_3 */
+			DRM_DEBUG("rlcp_ucode_version: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_version));
+			DRM_DEBUG("rlcp_ucode_feature_version: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_feature_version));
+			DRM_DEBUG("rlcp_ucode_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_size_bytes));
+			DRM_DEBUG("rlcp_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcp_ucode_offset_bytes));
+			DRM_DEBUG("rlcv_ucode_version: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_version));
+			DRM_DEBUG("rlcv_ucode_feature_version: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_feature_version));
+			DRM_DEBUG("rlcv_ucode_size_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_size_bytes));
+			DRM_DEBUG("rlcv_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_3->rlcv_ucode_offset_bytes));
+			break;
+		case 4:
+			/* rlc_hdr v2_4 */
+			DRM_DEBUG("global_tap_delays_ucode_size_bytes :%u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_size_bytes));
+			DRM_DEBUG("global_tap_delays_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->global_tap_delays_ucode_offset_bytes));
+			DRM_DEBUG("se0_tap_delays_ucode_size_bytes :%u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_size_bytes));
+			DRM_DEBUG("se0_tap_delays_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se0_tap_delays_ucode_offset_bytes));
+			DRM_DEBUG("se1_tap_delays_ucode_size_bytes :%u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_size_bytes));
+			DRM_DEBUG("se1_tap_delays_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se1_tap_delays_ucode_offset_bytes));
+			DRM_DEBUG("se2_tap_delays_ucode_size_bytes :%u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_size_bytes));
+			DRM_DEBUG("se2_tap_delays_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se2_tap_delays_ucode_offset_bytes));
+			DRM_DEBUG("se3_tap_delays_ucode_size_bytes :%u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_size_bytes));
+			DRM_DEBUG("se3_tap_delays_ucode_offset_bytes: %u\n",
+				  le32_to_cpu(rlc_hdr_v2_4->se3_tap_delays_ucode_offset_bytes));
+			break;
+		default:
+			DRM_ERROR("Unknown RLC v2 ucode: v2.%u\n", version_minor);
+			break;
+		}
+	} else {
+		DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("SDMA\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct sdma_firmware_header_v1_0 *sdma_hdr =
+			container_of(hdr, struct sdma_firmware_header_v1_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(sdma_hdr->ucode_feature_version));
+		DRM_DEBUG("ucode_change_version: %u\n",
+			  le32_to_cpu(sdma_hdr->ucode_change_version));
+		DRM_DEBUG("jt_offset: %u\n", le32_to_cpu(sdma_hdr->jt_offset));
+		DRM_DEBUG("jt_size: %u\n", le32_to_cpu(sdma_hdr->jt_size));
+		if (version_minor >= 1) {
+			const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr =
+				container_of(sdma_hdr, struct sdma_firmware_header_v1_1, v1_0);
+			DRM_DEBUG("digest_size: %u\n", le32_to_cpu(sdma_v1_1_hdr->digest_size));
+		}
+	} else if (version_major == 2) {
+		const struct sdma_firmware_header_v2_0 *sdma_hdr =
+			container_of(hdr, struct sdma_firmware_header_v2_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(sdma_hdr->ucode_feature_version));
+		DRM_DEBUG("ctx_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_offset));
+		DRM_DEBUG("ctx_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctx_jt_size));
+		DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+		DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
+		DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
+	} else if (version_major == 3) {
+		const struct sdma_firmware_header_v3_0 *sdma_hdr =
+			container_of(hdr, struct sdma_firmware_header_v3_0, header);
+
+		DRM_DEBUG("ucode_reversion: %u\n",
+			  le32_to_cpu(sdma_hdr->ucode_feature_version));
+	} else {
+		DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
+			  version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+	uint32_t fw_index;
+	const struct psp_fw_bin_desc *desc;
+
+	DRM_DEBUG("PSP\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct psp_firmware_header_v1_0 *psp_hdr =
+			container_of(hdr, struct psp_firmware_header_v1_0, header);
+
+		DRM_DEBUG("ucode_feature_version: %u\n",
+			  le32_to_cpu(psp_hdr->sos.fw_version));
+		DRM_DEBUG("sos_offset_bytes: %u\n",
+			  le32_to_cpu(psp_hdr->sos.offset_bytes));
+		DRM_DEBUG("sos_size_bytes: %u\n",
+			  le32_to_cpu(psp_hdr->sos.size_bytes));
+		if (version_minor == 1) {
+			const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+				container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+			DRM_DEBUG("toc_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->toc.fw_version));
+			DRM_DEBUG("toc_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->toc.offset_bytes));
+			DRM_DEBUG("toc_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->toc.size_bytes));
+			DRM_DEBUG("kdb_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->kdb.fw_version));
+			DRM_DEBUG("kdb_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->kdb.offset_bytes));
+			DRM_DEBUG("kdb_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_1->kdb.size_bytes));
+		}
+		if (version_minor == 2) {
+			const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 =
+				container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0);
+			DRM_DEBUG("kdb_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_2->kdb.fw_version));
+			DRM_DEBUG("kdb_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_2->kdb.offset_bytes));
+			DRM_DEBUG("kdb_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_2->kdb.size_bytes));
+		}
+		if (version_minor == 3) {
+			const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+				container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+			const struct psp_firmware_header_v1_3 *psp_hdr_v1_3 =
+				container_of(psp_hdr_v1_1, struct psp_firmware_header_v1_3, v1_1);
+			DRM_DEBUG("toc_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.toc.fw_version));
+			DRM_DEBUG("toc_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.toc.offset_bytes));
+			DRM_DEBUG("toc_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.toc.size_bytes));
+			DRM_DEBUG("kdb_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.fw_version));
+			DRM_DEBUG("kdb_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.offset_bytes));
+			DRM_DEBUG("kdb_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->v1_1.kdb.size_bytes));
+			DRM_DEBUG("spl_header_version: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->spl.fw_version));
+			DRM_DEBUG("spl_offset_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->spl.offset_bytes));
+			DRM_DEBUG("spl_size_bytes: %u\n",
+				  le32_to_cpu(psp_hdr_v1_3->spl.size_bytes));
+		}
+	} else if (version_major == 2) {
+		const struct psp_firmware_header_v2_0 *psp_hdr_v2_0 =
+			 container_of(hdr, struct psp_firmware_header_v2_0, header);
+		for (fw_index = 0; fw_index < le32_to_cpu(psp_hdr_v2_0->psp_fw_bin_count); fw_index++) {
+			desc = &(psp_hdr_v2_0->psp_fw_bin[fw_index]);
+			switch (desc->fw_type) {
+			case PSP_FW_TYPE_PSP_SOS:
+				DRM_DEBUG("psp_sos_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_sos_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_SYS_DRV:
+				DRM_DEBUG("psp_sys_drv_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_sys_drv_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_KDB:
+				DRM_DEBUG("psp_kdb_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_kdb_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_TOC:
+				DRM_DEBUG("psp_toc_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_toc_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_SPL:
+				DRM_DEBUG("psp_spl_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_spl_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_RL:
+				DRM_DEBUG("psp_rl_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_rl_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_SOC_DRV:
+				DRM_DEBUG("psp_soc_drv_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_soc_drv_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_INTF_DRV:
+				DRM_DEBUG("psp_intf_drv_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_intf_drv_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_DBG_DRV:
+				DRM_DEBUG("psp_dbg_drv_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_dbg_drv_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			case PSP_FW_TYPE_PSP_RAS_DRV:
+				DRM_DEBUG("psp_ras_drv_version: %u\n",
+					  le32_to_cpu(desc->fw_version));
+				DRM_DEBUG("psp_ras_drv_size_bytes: %u\n",
+					  le32_to_cpu(desc->size_bytes));
+				break;
+			default:
+				DRM_DEBUG("Unsupported PSP fw type: %d\n", desc->fw_type);
+				break;
+			}
+		}
+	} else {
+		DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
+			  version_major, version_minor);
+	}
+}
+
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
+{
+	uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+	DRM_DEBUG("GPU_INFO\n");
+	amdgpu_ucode_print_common_hdr(hdr);
+
+	if (version_major == 1) {
+		const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr =
+			container_of(hdr, struct gpu_info_firmware_header_v1_0, header);
+
+		DRM_DEBUG("version_major: %u\n",
+			  le16_to_cpu(gpu_info_hdr->version_major));
+		DRM_DEBUG("version_minor: %u\n",
+			  le16_to_cpu(gpu_info_hdr->version_minor));
+	} else {
+		DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor);
+	}
+}
+
+static int amdgpu_ucode_validate(const struct firmware *fw)
+{
+	const struct common_firmware_header *hdr =
+		(const struct common_firmware_header *)fw->data;
+
+	if (fw->size == le32_to_cpu(hdr->size_bytes))
+		return 0;
+
+	return -EINVAL;
+}
+
+bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
+				uint16_t hdr_major, uint16_t hdr_minor)
+{
+	if ((hdr->common.header_version_major == hdr_major) &&
+		(hdr->common.header_version_minor == hdr_minor))
+		return true;
+	return false;
+}
+
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
+{
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+		return AMDGPU_FW_LOAD_DIRECT;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_HAWAII:
+	case CHIP_MULLINS:
+		return AMDGPU_FW_LOAD_DIRECT;
+#endif
+	case CHIP_TOPAZ:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		return AMDGPU_FW_LOAD_SMU;
+	case CHIP_CYAN_SKILLFISH:
+		if (!(load_type &&
+		      adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
+			return AMDGPU_FW_LOAD_DIRECT;
+		else
+			return AMDGPU_FW_LOAD_PSP;
+	default:
+		if (!load_type)
+			return AMDGPU_FW_LOAD_DIRECT;
+		else if (load_type == 3)
+			return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO;
+		else
+			return AMDGPU_FW_LOAD_PSP;
+	}
+}
+
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
+{
+	switch (ucode_id) {
+	case AMDGPU_UCODE_ID_SDMA0:
+		return "SDMA0";
+	case AMDGPU_UCODE_ID_SDMA1:
+		return "SDMA1";
+	case AMDGPU_UCODE_ID_SDMA2:
+		return "SDMA2";
+	case AMDGPU_UCODE_ID_SDMA3:
+		return "SDMA3";
+	case AMDGPU_UCODE_ID_SDMA4:
+		return "SDMA4";
+	case AMDGPU_UCODE_ID_SDMA5:
+		return "SDMA5";
+	case AMDGPU_UCODE_ID_SDMA6:
+		return "SDMA6";
+	case AMDGPU_UCODE_ID_SDMA7:
+		return "SDMA7";
+	case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+		return "SDMA_CTX";
+	case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+		return "SDMA_CTL";
+	case AMDGPU_UCODE_ID_CP_CE:
+		return "CP_CE";
+	case AMDGPU_UCODE_ID_CP_PFP:
+		return "CP_PFP";
+	case AMDGPU_UCODE_ID_CP_ME:
+		return "CP_ME";
+	case AMDGPU_UCODE_ID_CP_MEC1:
+		return "CP_MEC1";
+	case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		return "CP_MEC1_JT";
+	case AMDGPU_UCODE_ID_CP_MEC2:
+		return "CP_MEC2";
+	case AMDGPU_UCODE_ID_CP_MEC2_JT:
+		return "CP_MEC2_JT";
+	case AMDGPU_UCODE_ID_CP_MES:
+		return "CP_MES";
+	case AMDGPU_UCODE_ID_CP_MES_DATA:
+		return "CP_MES_DATA";
+	case AMDGPU_UCODE_ID_CP_MES1:
+		return "CP_MES_KIQ";
+	case AMDGPU_UCODE_ID_CP_MES1_DATA:
+		return "CP_MES_KIQ_DATA";
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		return "RLC_RESTORE_LIST_CNTL";
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		return "RLC_RESTORE_LIST_GPM_MEM";
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		return "RLC_RESTORE_LIST_SRM_MEM";
+	case AMDGPU_UCODE_ID_RLC_IRAM:
+		return "RLC_IRAM";
+	case AMDGPU_UCODE_ID_RLC_DRAM:
+		return "RLC_DRAM";
+	case AMDGPU_UCODE_ID_RLC_G:
+		return "RLC_G";
+	case AMDGPU_UCODE_ID_RLC_P:
+		return "RLC_P";
+	case AMDGPU_UCODE_ID_RLC_V:
+		return "RLC_V";
+	case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+		return "GLOBAL_TAP_DELAYS";
+	case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+		return "SE0_TAP_DELAYS";
+	case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+		return "SE1_TAP_DELAYS";
+	case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+		return "SE2_TAP_DELAYS";
+	case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+		return "SE3_TAP_DELAYS";
+	case AMDGPU_UCODE_ID_IMU_I:
+		return "IMU_I";
+	case AMDGPU_UCODE_ID_IMU_D:
+		return "IMU_D";
+	case AMDGPU_UCODE_ID_STORAGE:
+		return "STORAGE";
+	case AMDGPU_UCODE_ID_SMC:
+		return "SMC";
+	case AMDGPU_UCODE_ID_PPTABLE:
+		return "PPTABLE";
+	case AMDGPU_UCODE_ID_P2S_TABLE:
+		return "P2STABLE";
+	case AMDGPU_UCODE_ID_UVD:
+		return "UVD";
+	case AMDGPU_UCODE_ID_UVD1:
+		return "UVD1";
+	case AMDGPU_UCODE_ID_VCE:
+		return "VCE";
+	case AMDGPU_UCODE_ID_VCN:
+		return "VCN";
+	case AMDGPU_UCODE_ID_VCN1:
+		return "VCN1";
+	case AMDGPU_UCODE_ID_DMCU_ERAM:
+		return "DMCU_ERAM";
+	case AMDGPU_UCODE_ID_DMCU_INTV:
+		return "DMCU_INTV";
+	case AMDGPU_UCODE_ID_VCN0_RAM:
+		return "VCN0_RAM";
+	case AMDGPU_UCODE_ID_VCN1_RAM:
+		return "VCN1_RAM";
+	case AMDGPU_UCODE_ID_DMCUB:
+		return "DMCUB";
+	case AMDGPU_UCODE_ID_CAP:
+		return "CAP";
+	case AMDGPU_UCODE_ID_VPE_CTX:
+		return "VPE_CTX";
+	case AMDGPU_UCODE_ID_VPE_CTL:
+		return "VPE_CTL";
+	case AMDGPU_UCODE_ID_VPE:
+		return "VPE";
+	case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+		return "UMSCH_MM_UCODE";
+	case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+		return "UMSCH_MM_DATA";
+	case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER:
+		return "UMSCH_MM_CMD_BUFFER";
+	case AMDGPU_UCODE_ID_JPEG_RAM:
+		return "JPEG";
+	case AMDGPU_UCODE_ID_SDMA_RS64:
+		return "RS64_SDMA";
+	case AMDGPU_UCODE_ID_CP_RS64_PFP:
+		return "RS64_PFP";
+	case AMDGPU_UCODE_ID_CP_RS64_ME:
+		return "RS64_ME";
+	case AMDGPU_UCODE_ID_CP_RS64_MEC:
+		return "RS64_MEC";
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+		return "RS64_PFP_P0_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+		return "RS64_PFP_P1_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+		return "RS64_ME_P0_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+		return "RS64_ME_P1_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+		return "RS64_MEC_P0_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+		return "RS64_MEC_P1_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+		return "RS64_MEC_P2_STACK";
+	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+		return "RS64_MEC_P3_STACK";
+	case AMDGPU_UCODE_ID_ISP:
+		return "ISP";
+	default:
+		return "UNKNOWN UCODE";
+	}
+}
+
+static inline int amdgpu_ucode_is_valid(uint32_t fw_version)
+{
+	if (!fw_version)
+		return -EINVAL;
+
+	return 0;
+}
+
+#define FW_VERSION_ATTR(name, mode, field)				\
+static ssize_t show_##name(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	struct drm_device *ddev = dev_get_drvdata(dev);			\
+	struct amdgpu_device *adev = drm_to_adev(ddev);			\
+									\
+	if (!buf)							\
+		return amdgpu_ucode_is_valid(adev->field);		\
+									\
+	return sysfs_emit(buf, "0x%08x\n", adev->field);		\
+}									\
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.xgmi_context.context.bin_desc.fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version);
+FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version);
+
+static struct attribute *fw_attrs[] = {
+	&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
+	&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
+	&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
+	&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
+	&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
+	&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
+	&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
+	&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
+	&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
+	&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
+	&dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr,
+	&dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr,
+	&dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr,
+	NULL
+};
+
+#define to_dev_attr(x) container_of(x, struct device_attribute, attr)
+
+static umode_t amdgpu_ucode_sys_visible(struct kobject *kobj,
+					struct attribute *attr, int idx)
+{
+	struct device_attribute *dev_attr = to_dev_attr(attr);
+	struct device *dev = kobj_to_dev(kobj);
+
+	if (dev_attr->show(dev, dev_attr, NULL) == -EINVAL)
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group fw_attr_group = {
+	.name = "fw_version",
+	.attrs = fw_attrs,
+	.is_visible = amdgpu_ucode_sys_visible
+};
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+	return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+	sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
+				       struct amdgpu_firmware_info *ucode,
+				       uint64_t mc_addr, void *kptr)
+{
+	const struct common_firmware_header *header = NULL;
+	const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+	const struct gfx_firmware_header_v2_0 *cpv2_hdr = NULL;
+	const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
+	const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
+	const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
+	const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
+	const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL;
+	const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
+	const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
+	const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
+	u8 *ucode_addr;
+
+	if (!ucode->fw)
+		return 0;
+
+	ucode->mc_addr = mc_addr;
+	ucode->kaddr = kptr;
+
+	if (ucode->ucode_id == AMDGPU_UCODE_ID_STORAGE)
+		return 0;
+
+	header = (const struct common_firmware_header *)ucode->fw->data;
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+	cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)ucode->fw->data;
+	dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
+	dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
+	mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
+	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
+	sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data;
+	imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
+	vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data;
+	umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		switch (ucode->ucode_id) {
+		case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
+			ucode->ucode_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
+			ucode->ucode_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(sdma_hdr->ctl_ucode_offset);
+			break;
+		case AMDGPU_UCODE_ID_SDMA_RS64:
+			ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_MEC1:
+		case AMDGPU_UCODE_ID_CP_MEC2:
+			ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+				le32_to_cpu(cp_hdr->jt_size) * 4;
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		case AMDGPU_UCODE_ID_CP_MEC2_JT:
+			ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes) +
+				le32_to_cpu(cp_hdr->jt_offset) * 4;
+			break;
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+			ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+			ucode_addr = adev->gfx.rlc.save_restore_list_cntl;
+			break;
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+			ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+			ucode_addr = adev->gfx.rlc.save_restore_list_gpm;
+			break;
+		case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+			ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+			ucode_addr = adev->gfx.rlc.save_restore_list_srm;
+			break;
+		case AMDGPU_UCODE_ID_RLC_IRAM:
+			ucode->ucode_size = adev->gfx.rlc.rlc_iram_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.rlc_iram_ucode;
+			break;
+		case AMDGPU_UCODE_ID_RLC_DRAM:
+			ucode->ucode_size = adev->gfx.rlc.rlc_dram_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.rlc_dram_ucode;
+			break;
+		case AMDGPU_UCODE_ID_RLC_P:
+			ucode->ucode_size = adev->gfx.rlc.rlcp_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.rlcp_ucode;
+			break;
+		case AMDGPU_UCODE_ID_RLC_V:
+			ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.rlcv_ucode;
+			break;
+		case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+			ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
+			break;
+		case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+			ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
+			break;
+		case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+			ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
+			break;
+		case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+			ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
+			break;
+		case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+			ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
+			ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
+			break;
+		case AMDGPU_UCODE_ID_CP_MES:
+			ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_MES_DATA:
+			ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_MES1:
+			ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_MES1_DATA:
+			ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_DMCU_ERAM:
+			ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+				le32_to_cpu(dmcu_hdr->intv_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_DMCU_INTV:
+			ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes) +
+				le32_to_cpu(dmcu_hdr->intv_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_DMCUB:
+			ucode->ucode_size = le32_to_cpu(dmcub_hdr->inst_const_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_PPTABLE:
+			ucode->ucode_size = ucode->fw->size;
+			ucode_addr = (u8 *)ucode->fw->data;
+			break;
+		case AMDGPU_UCODE_ID_P2S_TABLE:
+			ucode->ucode_size = ucode->fw->size;
+			ucode_addr = (u8 *)ucode->fw->data;
+			break;
+		case AMDGPU_UCODE_ID_IMU_I:
+			ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_IMU_D:
+			ucode->ucode_size = le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(imu_hdr->header.ucode_array_offset_bytes) +
+				le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_PFP:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_ME:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_MEC:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+			ucode->ucode_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(cpv2_hdr->data_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_VPE_CTX:
+			ucode->ucode_size = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_VPE_CTL:
+			ucode->ucode_size = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(vpe_hdr->ctl_ucode_offset);
+			break;
+		case AMDGPU_UCODE_ID_UMSCH_MM_UCODE:
+			ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes);
+			break;
+		case AMDGPU_UCODE_ID_UMSCH_MM_DATA:
+			ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes);
+			break;
+		default:
+			ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+			ucode_addr = (u8 *)ucode->fw->data +
+				le32_to_cpu(header->ucode_array_offset_bytes);
+			break;
+		}
+	} else {
+		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
+		ucode_addr = (u8 *)ucode->fw->data +
+			le32_to_cpu(header->ucode_array_offset_bytes);
+	}
+
+	memcpy(ucode->kaddr, ucode_addr, ucode->ucode_size);
+
+	return 0;
+}
+
+static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
+				uint64_t mc_addr, void *kptr)
+{
+	const struct gfx_firmware_header_v1_0 *header = NULL;
+	const struct common_firmware_header *comm_hdr = NULL;
+	uint8_t *src_addr = NULL;
+	uint8_t *dst_addr = NULL;
+
+	if (!ucode->fw)
+		return 0;
+
+	comm_hdr = (const struct common_firmware_header *)ucode->fw->data;
+	header = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+	dst_addr = ucode->kaddr +
+			   ALIGN(le32_to_cpu(comm_hdr->ucode_size_bytes),
+			   PAGE_SIZE);
+	src_addr = (uint8_t *)ucode->fw->data +
+			   le32_to_cpu(comm_hdr->ucode_array_offset_bytes) +
+			   (le32_to_cpu(header->jt_offset) * 4);
+	memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4);
+
+	return 0;
+}
+
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
+{
+	if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) &&
+	    (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) {
+		amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
+			(amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ?
+			AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+			&adev->firmware.fw_buf,
+			&adev->firmware.fw_buf_mc,
+			&adev->firmware.fw_buf_ptr);
+		if (!adev->firmware.fw_buf) {
+			dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
+			return -ENOMEM;
+		} else if (amdgpu_sriov_vf(adev)) {
+			memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
+		}
+	}
+	return 0;
+}
+
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+		&adev->firmware.fw_buf_mc,
+		&adev->firmware.fw_buf_ptr);
+}
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+{
+	uint64_t fw_offset = 0;
+	int i;
+	struct amdgpu_firmware_info *ucode = NULL;
+
+ /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
+	if (!amdgpu_sriov_vf(adev) && (amdgpu_in_reset(adev) || adev->in_suspend))
+		return 0;
+	/*
+	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
+	 * ucode info here
+	 */
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+		if (amdgpu_sriov_vf(adev))
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 3;
+		else
+			adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4;
+	} else {
+		adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM;
+	}
+
+	if (amdgpu_virt_xgmi_migrate_enabled(adev) && adev->firmware.fw_buf)
+		adev->firmware.fw_buf_mc = amdgpu_bo_fb_aper_addr(adev->firmware.fw_buf);
+
+	for (i = 0; i < adev->firmware.max_ucodes; i++) {
+		ucode = &adev->firmware.ucode[i];
+		if (ucode->fw) {
+			amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
+						    adev->firmware.fw_buf_ptr + fw_offset);
+			if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
+			    adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+				const struct gfx_firmware_header_v1_0 *cp_hdr;
+
+				cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+				amdgpu_ucode_patch_jt(ucode,  adev->firmware.fw_buf_mc + fw_offset,
+						    adev->firmware.fw_buf_ptr + fw_offset);
+				fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
+			}
+			fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);
+		}
+	}
+	return 0;
+}
+
+static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type)
+{
+	if (block_type == MP0_HWIP) {
+		switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+		case IP_VERSION(9, 0, 0):
+			switch (adev->asic_type) {
+			case CHIP_VEGA10:
+				return "vega10";
+			case CHIP_VEGA12:
+				return "vega12";
+			default:
+				return NULL;
+			}
+		case IP_VERSION(10, 0, 0):
+		case IP_VERSION(10, 0, 1):
+			if (adev->asic_type == CHIP_RAVEN) {
+				if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+					return "raven2";
+				else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+					return "picasso";
+				return "raven";
+			}
+			break;
+		case IP_VERSION(11, 0, 0):
+			return "navi10";
+		case IP_VERSION(11, 0, 2):
+			return "vega20";
+		case IP_VERSION(11, 0, 3):
+			return "renoir";
+		case IP_VERSION(11, 0, 4):
+			return "arcturus";
+		case IP_VERSION(11, 0, 5):
+			return "navi14";
+		case IP_VERSION(11, 0, 7):
+			return "sienna_cichlid";
+		case IP_VERSION(11, 0, 9):
+			return "navi12";
+		case IP_VERSION(11, 0, 11):
+			return "navy_flounder";
+		case IP_VERSION(11, 0, 12):
+			return "dimgrey_cavefish";
+		case IP_VERSION(11, 0, 13):
+			return "beige_goby";
+		case IP_VERSION(11, 5, 0):
+		case IP_VERSION(11, 5, 2):
+			return "vangogh";
+		case IP_VERSION(12, 0, 1):
+			return "green_sardine";
+		case IP_VERSION(13, 0, 2):
+			return "aldebaran";
+		case IP_VERSION(13, 0, 1):
+		case IP_VERSION(13, 0, 3):
+			return "yellow_carp";
+		}
+	} else if (block_type == MP1_HWIP) {
+		switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
+		case IP_VERSION(9, 0, 0):
+		case IP_VERSION(10, 0, 0):
+		case IP_VERSION(10, 0, 1):
+		case IP_VERSION(11, 0, 2):
+			if (adev->asic_type == CHIP_ARCTURUS)
+				return "arcturus_smc";
+			return NULL;
+		case IP_VERSION(11, 0, 0):
+			return "navi10_smc";
+		case IP_VERSION(11, 0, 5):
+			return "navi14_smc";
+		case IP_VERSION(11, 0, 9):
+			return "navi12_smc";
+		case IP_VERSION(11, 0, 7):
+			return "sienna_cichlid_smc";
+		case IP_VERSION(11, 0, 11):
+			return "navy_flounder_smc";
+		case IP_VERSION(11, 0, 12):
+			return "dimgrey_cavefish_smc";
+		case IP_VERSION(11, 0, 13):
+			return "beige_goby_smc";
+		case IP_VERSION(13, 0, 2):
+			return "aldebaran_smc";
+		}
+	} else if (block_type == SDMA0_HWIP) {
+		switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+		case IP_VERSION(4, 0, 0):
+			return "vega10_sdma";
+		case IP_VERSION(4, 0, 1):
+			return "vega12_sdma";
+		case IP_VERSION(4, 1, 0):
+		case IP_VERSION(4, 1, 1):
+			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+				return "raven2_sdma";
+			else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+				return "picasso_sdma";
+			return "raven_sdma";
+		case IP_VERSION(4, 1, 2):
+			if (adev->apu_flags & AMD_APU_IS_RENOIR)
+				return "renoir_sdma";
+			return "green_sardine_sdma";
+		case IP_VERSION(4, 2, 0):
+			return "vega20_sdma";
+		case IP_VERSION(4, 2, 2):
+			return "arcturus_sdma";
+		case IP_VERSION(4, 4, 0):
+			return "aldebaran_sdma";
+		case IP_VERSION(5, 0, 0):
+			return "navi10_sdma";
+		case IP_VERSION(5, 0, 1):
+			return "cyan_skillfish2_sdma";
+		case IP_VERSION(5, 0, 2):
+			return "navi14_sdma";
+		case IP_VERSION(5, 0, 5):
+			return "navi12_sdma";
+		case IP_VERSION(5, 2, 0):
+			return "sienna_cichlid_sdma";
+		case IP_VERSION(5, 2, 2):
+			return "navy_flounder_sdma";
+		case IP_VERSION(5, 2, 4):
+			return "dimgrey_cavefish_sdma";
+		case IP_VERSION(5, 2, 5):
+			return "beige_goby_sdma";
+		case IP_VERSION(5, 2, 3):
+			return "yellow_carp_sdma";
+		case IP_VERSION(5, 2, 1):
+			return "vangogh_sdma";
+		}
+	} else if (block_type == UVD_HWIP) {
+		switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
+		case IP_VERSION(1, 0, 0):
+		case IP_VERSION(1, 0, 1):
+			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+				return "raven2_vcn";
+			else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+				return "picasso_vcn";
+			return "raven_vcn";
+		case IP_VERSION(2, 5, 0):
+			return "arcturus_vcn";
+		case IP_VERSION(2, 2, 0):
+			if (adev->apu_flags & AMD_APU_IS_RENOIR)
+				return "renoir_vcn";
+			return "green_sardine_vcn";
+		case IP_VERSION(2, 6, 0):
+			return "aldebaran_vcn";
+		case IP_VERSION(2, 0, 0):
+			return "navi10_vcn";
+		case IP_VERSION(2, 0, 2):
+			if (adev->asic_type == CHIP_NAVI12)
+				return "navi12_vcn";
+			return "navi14_vcn";
+		case IP_VERSION(3, 0, 0):
+		case IP_VERSION(3, 0, 64):
+		case IP_VERSION(3, 0, 192):
+			if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			    IP_VERSION(10, 3, 0))
+				return "sienna_cichlid_vcn";
+			return "navy_flounder_vcn";
+		case IP_VERSION(3, 0, 2):
+			return "vangogh_vcn";
+		case IP_VERSION(3, 0, 16):
+			return "dimgrey_cavefish_vcn";
+		case IP_VERSION(3, 0, 33):
+			return "beige_goby_vcn";
+		case IP_VERSION(3, 1, 1):
+			return "yellow_carp_vcn";
+		}
+	} else if (block_type == GC_HWIP) {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(9, 0, 1):
+			return "vega10";
+		case IP_VERSION(9, 2, 1):
+			return "vega12";
+		case IP_VERSION(9, 4, 0):
+			return "vega20";
+		case IP_VERSION(9, 2, 2):
+		case IP_VERSION(9, 1, 0):
+			if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+				return "raven2";
+			else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+				return "picasso";
+			return "raven";
+		case IP_VERSION(9, 4, 1):
+			return "arcturus";
+		case IP_VERSION(9, 3, 0):
+			if (adev->apu_flags & AMD_APU_IS_RENOIR)
+				return "renoir";
+			return "green_sardine";
+		case IP_VERSION(9, 4, 2):
+			return "aldebaran";
+		case IP_VERSION(10, 1, 10):
+			return "navi10";
+		case IP_VERSION(10, 1, 1):
+			return "navi14";
+		case IP_VERSION(10, 1, 2):
+			return "navi12";
+		case IP_VERSION(10, 3, 0):
+			return "sienna_cichlid";
+		case IP_VERSION(10, 3, 2):
+			return "navy_flounder";
+		case IP_VERSION(10, 3, 1):
+			return "vangogh";
+		case IP_VERSION(10, 3, 4):
+			return "dimgrey_cavefish";
+		case IP_VERSION(10, 3, 5):
+			return "beige_goby";
+		case IP_VERSION(10, 3, 3):
+			return "yellow_carp";
+		case IP_VERSION(10, 1, 3):
+		case IP_VERSION(10, 1, 4):
+			return "cyan_skillfish2";
+		}
+	}
+	return NULL;
+}
+
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+		if (adev->pdev->device == kicker_device_list[i].device &&
+		    adev->pdev->revision == kicker_device_list[i].revision)
+			return true;
+	}
+
+	return false;
+}
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
+{
+	int maj, min, rev;
+	char *ip_name;
+	const char *legacy;
+	uint32_t version = amdgpu_ip_version(adev, block_type, 0);
+
+	legacy = amdgpu_ucode_legacy_naming(adev, block_type);
+	if (legacy) {
+		snprintf(ucode_prefix, len, "%s", legacy);
+		return;
+	}
+
+	switch (block_type) {
+	case GC_HWIP:
+		ip_name = "gc";
+		break;
+	case SDMA0_HWIP:
+		ip_name = "sdma";
+		break;
+	case MP0_HWIP:
+		ip_name = "psp";
+		break;
+	case MP1_HWIP:
+		ip_name = "smu";
+		break;
+	case UVD_HWIP:
+		ip_name = "vcn";
+		break;
+	case VPE_HWIP:
+		ip_name = "vpe";
+		break;
+	case ISP_HWIP:
+		ip_name = "isp";
+		break;
+	default:
+		BUG();
+	}
+
+	maj = IP_VERSION_MAJ(version);
+	min = IP_VERSION_MIN(version);
+	rev = IP_VERSION_REV(version);
+
+	snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
+}
+
+/*
+ * amdgpu_ucode_request - Fetch and validate amdgpu microcode
+ *
+ * @adev: amdgpu device
+ * @fw: pointer to load firmware to
+ * @required: whether the firmware is required
+ * @fmt: firmware name format string
+ * @...: variable arguments
+ *
+ * This is a helper that will use request_firmware and amdgpu_ucode_validate
+ * to load and run basic validation on firmware. If the load fails, remap
+ * the error code to -ENODEV, so that early_init functions will fail to load.
+ */
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+			 enum amdgpu_ucode_required required, const char *fmt, ...)
+{
+	char fname[AMDGPU_UCODE_NAME_MAX];
+	va_list ap;
+	int r;
+
+	va_start(ap, fmt);
+	r = vsnprintf(fname, sizeof(fname), fmt, ap);
+	va_end(ap);
+	if (r == sizeof(fname)) {
+		dev_warn(adev->dev, "amdgpu firmware name buffer overflow\n");
+		return -EOVERFLOW;
+	}
+
+	if (required == AMDGPU_UCODE_REQUIRED)
+		r = request_firmware(fw, fname, adev->dev);
+	else {
+		r = firmware_request_nowarn(fw, fname, adev->dev);
+		if (r)
+			drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+	}
+	if (r)
+		return -ENODEV;
+
+	r = amdgpu_ucode_validate(*fw);
+	if (r)
+		/*
+		 * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+		 * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+		 * firmware release and need to avoid redundant release FW operation here.
+		 */
+		dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
+
+	return r;
+}
+
+/*
+ * amdgpu_ucode_release - Release firmware microcode
+ *
+ * @fw: pointer to firmware to release
+ */
+void amdgpu_ucode_release(const struct firmware **fw)
+{
+	release_firmware(*fw);
+	*fw = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
new file mode 100644
index 000000000000..6349aad6da35
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -0,0 +1,642 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_UCODE_H__
+#define __AMDGPU_UCODE_H__
+
+#include "amdgpu_socbb.h"
+
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
+struct common_firmware_header {
+	uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
+	uint32_t header_size_bytes; /* size of just the header in bytes */
+	uint16_t header_version_major; /* header version */
+	uint16_t header_version_minor; /* header version */
+	uint16_t ip_version_major; /* IP version */
+	uint16_t ip_version_minor; /* IP version */
+	uint32_t ucode_version;
+	uint32_t ucode_size_bytes; /* size of ucode in bytes */
+	uint32_t ucode_array_offset_bytes; /* payload offset from the start of the header */
+	uint32_t crc32;  /* crc32 checksum of the payload */
+};
+
+/* version_major=1, version_minor=0 */
+struct mc_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t io_debug_size_bytes; /* size of debug array in dwords */
+	uint32_t io_debug_array_offset_bytes; /* payload offset from the start of the header */
+};
+
+/* version_major=1, version_minor=0 */
+struct smc_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_start_addr;
+};
+
+/* version_major=2, version_minor=0 */
+struct smc_firmware_header_v2_0 {
+	struct smc_firmware_header_v1_0 v1_0;
+	uint32_t ppt_offset_bytes; /* soft pptable offset */
+	uint32_t ppt_size_bytes; /* soft pptable size */
+};
+
+struct smc_soft_pptable_entry {
+        uint32_t id;
+        uint32_t ppt_offset_bytes;
+        uint32_t ppt_size_bytes;
+};
+
+/* version_major=2, version_minor=1 */
+struct smc_firmware_header_v2_1 {
+        struct smc_firmware_header_v1_0 v1_0;
+        uint32_t pptable_count;
+        uint32_t pptable_entry_offset;
+};
+
+struct psp_fw_legacy_bin_desc {
+	uint32_t fw_version;
+	uint32_t offset_bytes;
+	uint32_t size_bytes;
+};
+
+/* version_major=1, version_minor=0 */
+struct psp_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	struct psp_fw_legacy_bin_desc sos;
+};
+
+/* version_major=1, version_minor=1 */
+struct psp_firmware_header_v1_1 {
+	struct psp_firmware_header_v1_0 v1_0;
+	struct psp_fw_legacy_bin_desc toc;
+	struct psp_fw_legacy_bin_desc kdb;
+};
+
+/* version_major=1, version_minor=2 */
+struct psp_firmware_header_v1_2 {
+	struct psp_firmware_header_v1_0 v1_0;
+	struct psp_fw_legacy_bin_desc res;
+	struct psp_fw_legacy_bin_desc kdb;
+};
+
+/* version_major=1, version_minor=3 */
+struct psp_firmware_header_v1_3 {
+	struct psp_firmware_header_v1_1 v1_1;
+	struct psp_fw_legacy_bin_desc spl;
+	struct psp_fw_legacy_bin_desc rl;
+	struct psp_fw_legacy_bin_desc sys_drv_aux;
+	struct psp_fw_legacy_bin_desc sos_aux;
+};
+
+struct psp_fw_bin_desc {
+	uint32_t fw_type;
+	uint32_t fw_version;
+	uint32_t offset_bytes;
+	uint32_t size_bytes;
+};
+
+enum psp_fw_type {
+	PSP_FW_TYPE_UNKOWN,
+	PSP_FW_TYPE_PSP_SOS,
+	PSP_FW_TYPE_PSP_SYS_DRV,
+	PSP_FW_TYPE_PSP_KDB,
+	PSP_FW_TYPE_PSP_TOC,
+	PSP_FW_TYPE_PSP_SPL,
+	PSP_FW_TYPE_PSP_RL,
+	PSP_FW_TYPE_PSP_SOC_DRV,
+	PSP_FW_TYPE_PSP_INTF_DRV,
+	PSP_FW_TYPE_PSP_DBG_DRV,
+	PSP_FW_TYPE_PSP_RAS_DRV,
+	PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+	PSP_FW_TYPE_PSP_SPDM_DRV,
+	PSP_FW_TYPE_MAX_INDEX,
+};
+
+/* version_major=2, version_minor=0 */
+struct psp_firmware_header_v2_0 {
+	struct common_firmware_header header;
+	uint32_t psp_fw_bin_count;
+	struct psp_fw_bin_desc psp_fw_bin[];
+};
+
+/* version_major=2, version_minor=1 */
+struct psp_firmware_header_v2_1 {
+	struct common_firmware_header header;
+	uint32_t psp_fw_bin_count;
+	uint32_t psp_aux_fw_bin_index;
+	struct psp_fw_bin_desc psp_fw_bin[];
+};
+
+/* version_major=1, version_minor=0 */
+struct ta_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	struct psp_fw_legacy_bin_desc xgmi;
+	struct psp_fw_legacy_bin_desc ras;
+	struct psp_fw_legacy_bin_desc hdcp;
+	struct psp_fw_legacy_bin_desc dtm;
+	struct psp_fw_legacy_bin_desc securedisplay;
+};
+
+enum ta_fw_type {
+	TA_FW_TYPE_UNKOWN,
+	TA_FW_TYPE_PSP_ASD,
+	TA_FW_TYPE_PSP_XGMI,
+	TA_FW_TYPE_PSP_RAS,
+	TA_FW_TYPE_PSP_HDCP,
+	TA_FW_TYPE_PSP_DTM,
+	TA_FW_TYPE_PSP_RAP,
+	TA_FW_TYPE_PSP_SECUREDISPLAY,
+	TA_FW_TYPE_PSP_XGMI_AUX,
+	TA_FW_TYPE_MAX_INDEX,
+};
+
+/* version_major=2, version_minor=0 */
+struct ta_firmware_header_v2_0 {
+	struct common_firmware_header header;
+	uint32_t ta_fw_bin_count;
+	struct psp_fw_bin_desc ta_fw_bin[];
+};
+
+/* version_major=1, version_minor=0 */
+struct gfx_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t jt_offset; /* jt location */
+	uint32_t jt_size;  /* size of jt */
+};
+
+/* version_major=2, version_minor=0 */
+struct gfx_firmware_header_v2_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t ucode_size_bytes;
+	uint32_t ucode_offset_bytes;
+	uint32_t data_size_bytes;
+	uint32_t data_offset_bytes;
+	uint32_t ucode_start_addr_lo;
+	uint32_t ucode_start_addr_hi;
+};
+
+/* version_major=1, version_minor=0 */
+struct mes_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t mes_ucode_version;
+	uint32_t mes_ucode_size_bytes;
+	uint32_t mes_ucode_offset_bytes;
+	uint32_t mes_ucode_data_version;
+	uint32_t mes_ucode_data_size_bytes;
+	uint32_t mes_ucode_data_offset_bytes;
+	uint32_t mes_uc_start_addr_lo;
+	uint32_t mes_uc_start_addr_hi;
+	uint32_t mes_data_start_addr_lo;
+	uint32_t mes_data_start_addr_hi;
+};
+
+/* version_major=1, version_minor=0 */
+struct rlc_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t save_and_restore_offset;
+	uint32_t clear_state_descriptor_offset;
+	uint32_t avail_scratch_ram_locations;
+	uint32_t master_pkt_description_offset;
+};
+
+/* version_major=2, version_minor=0 */
+struct rlc_firmware_header_v2_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t jt_offset; /* jt location */
+	uint32_t jt_size;  /* size of jt */
+	uint32_t save_and_restore_offset;
+	uint32_t clear_state_descriptor_offset;
+	uint32_t avail_scratch_ram_locations;
+	uint32_t reg_restore_list_size;
+	uint32_t reg_list_format_start;
+	uint32_t reg_list_format_separate_start;
+	uint32_t starting_offsets_start;
+	uint32_t reg_list_format_size_bytes; /* size of reg list format array in bytes */
+	uint32_t reg_list_format_array_offset_bytes; /* payload offset from the start of the header */
+	uint32_t reg_list_size_bytes; /* size of reg list array in bytes */
+	uint32_t reg_list_array_offset_bytes; /* payload offset from the start of the header */
+	uint32_t reg_list_format_separate_size_bytes; /* size of reg list format array in bytes */
+	uint32_t reg_list_format_separate_array_offset_bytes; /* payload offset from the start of the header */
+	uint32_t reg_list_separate_size_bytes; /* size of reg list array in bytes */
+	uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
+};
+
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+	struct rlc_firmware_header_v2_0 v2_0;
+	uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+	uint32_t save_restore_list_cntl_ucode_ver;
+	uint32_t save_restore_list_cntl_feature_ver;
+	uint32_t save_restore_list_cntl_size_bytes;
+	uint32_t save_restore_list_cntl_offset_bytes;
+	uint32_t save_restore_list_gpm_ucode_ver;
+	uint32_t save_restore_list_gpm_feature_ver;
+	uint32_t save_restore_list_gpm_size_bytes;
+	uint32_t save_restore_list_gpm_offset_bytes;
+	uint32_t save_restore_list_srm_ucode_ver;
+	uint32_t save_restore_list_srm_feature_ver;
+	uint32_t save_restore_list_srm_size_bytes;
+	uint32_t save_restore_list_srm_offset_bytes;
+};
+
+/* version_major=2, version_minor=2 */
+struct rlc_firmware_header_v2_2 {
+	struct rlc_firmware_header_v2_1 v2_1;
+	uint32_t rlc_iram_ucode_size_bytes;
+	uint32_t rlc_iram_ucode_offset_bytes;
+	uint32_t rlc_dram_ucode_size_bytes;
+	uint32_t rlc_dram_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=3 */
+struct rlc_firmware_header_v2_3 {
+    struct rlc_firmware_header_v2_2 v2_2;
+    uint32_t rlcp_ucode_version;
+    uint32_t rlcp_ucode_feature_version;
+    uint32_t rlcp_ucode_size_bytes;
+    uint32_t rlcp_ucode_offset_bytes;
+    uint32_t rlcv_ucode_version;
+    uint32_t rlcv_ucode_feature_version;
+    uint32_t rlcv_ucode_size_bytes;
+    uint32_t rlcv_ucode_offset_bytes;
+};
+
+/* version_major=2, version_minor=4 */
+struct rlc_firmware_header_v2_4 {
+    struct rlc_firmware_header_v2_3 v2_3;
+    uint32_t global_tap_delays_ucode_size_bytes;
+    uint32_t global_tap_delays_ucode_offset_bytes;
+    uint32_t se0_tap_delays_ucode_size_bytes;
+    uint32_t se0_tap_delays_ucode_offset_bytes;
+    uint32_t se1_tap_delays_ucode_size_bytes;
+    uint32_t se1_tap_delays_ucode_offset_bytes;
+    uint32_t se2_tap_delays_ucode_size_bytes;
+    uint32_t se2_tap_delays_ucode_offset_bytes;
+    uint32_t se3_tap_delays_ucode_size_bytes;
+    uint32_t se3_tap_delays_ucode_offset_bytes;
+};
+
+/* version_major=1, version_minor=0 */
+struct sdma_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t ucode_change_version;
+	uint32_t jt_offset; /* jt location */
+	uint32_t jt_size; /* size of jt */
+};
+
+/* version_major=1, version_minor=1 */
+struct sdma_firmware_header_v1_1 {
+	struct sdma_firmware_header_v1_0 v1_0;
+	uint32_t digest_size;
+};
+
+/* version_major=2, version_minor=0 */
+struct sdma_firmware_header_v2_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+	uint32_t ctx_jt_offset; /* context thread jt location */
+	uint32_t ctx_jt_size; /* context thread size of jt */
+	uint32_t ctl_ucode_offset;
+	uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+	uint32_t ctl_jt_offset; /* control thread jt location */
+	uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct vpe_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t ucode_feature_version;
+	uint32_t ctx_ucode_size_bytes; /* context thread ucode size */
+	uint32_t ctx_jt_offset; /* context thread jt location */
+	uint32_t ctx_jt_size; /* context thread size of jt */
+	uint32_t ctl_ucode_offset;
+	uint32_t ctl_ucode_size_bytes; /* control thread ucode size */
+	uint32_t ctl_jt_offset; /* control thread jt location */
+	uint32_t ctl_jt_size; /* control thread size of jt */
+};
+
+/* version_major=1, version_minor=0 */
+struct umsch_mm_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t umsch_mm_ucode_version;
+	uint32_t umsch_mm_ucode_size_bytes;
+	uint32_t umsch_mm_ucode_offset_bytes;
+	uint32_t umsch_mm_ucode_data_version;
+	uint32_t umsch_mm_ucode_data_size_bytes;
+	uint32_t umsch_mm_ucode_data_offset_bytes;
+	uint32_t umsch_mm_irq_start_addr_lo;
+	uint32_t umsch_mm_irq_start_addr_hi;
+	uint32_t umsch_mm_uc_start_addr_lo;
+	uint32_t umsch_mm_uc_start_addr_hi;
+	uint32_t umsch_mm_data_start_addr_lo;
+	uint32_t umsch_mm_data_start_addr_hi;
+};
+
+/* version_major=3, version_minor=0 */
+struct sdma_firmware_header_v3_0 {
+    struct common_firmware_header header;
+    uint32_t ucode_feature_version;
+    uint32_t ucode_offset_bytes;
+    uint32_t ucode_size_bytes;
+};
+
+/* gpu info payload */
+struct gpu_info_firmware_v1_0 {
+	uint32_t gc_num_se;
+	uint32_t gc_num_cu_per_sh;
+	uint32_t gc_num_sh_per_se;
+	uint32_t gc_num_rb_per_se;
+	uint32_t gc_num_tccs;
+	uint32_t gc_num_gprs;
+	uint32_t gc_num_max_gs_thds;
+	uint32_t gc_gs_table_depth;
+	uint32_t gc_gsprim_buff_depth;
+	uint32_t gc_parameter_cache_depth;
+	uint32_t gc_double_offchip_lds_buffer;
+	uint32_t gc_wave_size;
+	uint32_t gc_max_waves_per_simd;
+	uint32_t gc_max_scratch_slots_per_cu;
+	uint32_t gc_lds_size;
+};
+
+struct gpu_info_firmware_v1_1 {
+	struct gpu_info_firmware_v1_0 v1_0;
+	uint32_t num_sc_per_sh;
+	uint32_t num_packer_per_sc;
+};
+
+/* gpu info payload
+ * version_major=1, version_minor=1 */
+struct gpu_info_firmware_v1_2 {
+	struct gpu_info_firmware_v1_1 v1_1;
+	struct gpu_info_soc_bounding_box_v1_0 soc_bounding_box;
+};
+
+/* version_major=1, version_minor=0 */
+struct gpu_info_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint16_t version_major; /* version */
+	uint16_t version_minor; /* version */
+};
+
+/* version_major=1, version_minor=0 */
+struct dmcu_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
+	uint32_t intv_size_bytes;  /* size of interrupt vectors, in bytes */
+};
+
+/* version_major=1, version_minor=0 */
+struct dmcub_firmware_header_v1_0 {
+	struct common_firmware_header header;
+	uint32_t inst_const_bytes; /* size of instruction region, in bytes */
+	uint32_t bss_data_bytes; /* size of bss/data region, in bytes */
+};
+
+/* version_major=1, version_minor=0 */
+struct imu_firmware_header_v1_0 {
+    struct common_firmware_header header;
+    uint32_t imu_iram_ucode_size_bytes;
+    uint32_t imu_iram_ucode_offset_bytes;
+    uint32_t imu_dram_ucode_size_bytes;
+    uint32_t imu_dram_ucode_offset_bytes;
+};
+
+/* header is fixed size */
+union amdgpu_firmware_header {
+	struct common_firmware_header common;
+	struct mc_firmware_header_v1_0 mc;
+	struct smc_firmware_header_v1_0 smc;
+	struct smc_firmware_header_v2_0 smc_v2_0;
+	struct psp_firmware_header_v1_0 psp;
+	struct psp_firmware_header_v1_1 psp_v1_1;
+	struct psp_firmware_header_v1_3 psp_v1_3;
+	struct psp_firmware_header_v2_0 psp_v2_0;
+	struct psp_firmware_header_v2_0 psp_v2_1;
+	struct ta_firmware_header_v1_0 ta;
+	struct ta_firmware_header_v2_0 ta_v2_0;
+	struct gfx_firmware_header_v1_0 gfx;
+	struct gfx_firmware_header_v2_0 gfx_v2_0;
+	struct rlc_firmware_header_v1_0 rlc;
+	struct rlc_firmware_header_v2_0 rlc_v2_0;
+	struct rlc_firmware_header_v2_1 rlc_v2_1;
+	struct rlc_firmware_header_v2_2 rlc_v2_2;
+	struct rlc_firmware_header_v2_3 rlc_v2_3;
+	struct rlc_firmware_header_v2_4 rlc_v2_4;
+	struct sdma_firmware_header_v1_0 sdma;
+	struct sdma_firmware_header_v1_1 sdma_v1_1;
+	struct sdma_firmware_header_v2_0 sdma_v2_0;
+	struct sdma_firmware_header_v3_0 sdma_v3_0;
+	struct gpu_info_firmware_header_v1_0 gpu_info;
+	struct dmcu_firmware_header_v1_0 dmcu;
+	struct dmcub_firmware_header_v1_0 dmcub;
+	struct imu_firmware_header_v1_0 imu;
+	uint8_t raw[0x100];
+};
+
+#define UCODE_MAX_PSP_PACKAGING (((sizeof(union amdgpu_firmware_header) - sizeof(struct common_firmware_header) - 4) / sizeof(struct psp_fw_bin_desc)) * 2)
+
+/*
+ * fw loading support
+ */
+enum AMDGPU_UCODE_ID {
+	AMDGPU_UCODE_ID_CAP = 0,
+	AMDGPU_UCODE_ID_SDMA0,
+	AMDGPU_UCODE_ID_SDMA1,
+	AMDGPU_UCODE_ID_SDMA2,
+	AMDGPU_UCODE_ID_SDMA3,
+	AMDGPU_UCODE_ID_SDMA4,
+	AMDGPU_UCODE_ID_SDMA5,
+	AMDGPU_UCODE_ID_SDMA6,
+	AMDGPU_UCODE_ID_SDMA7,
+	AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
+	AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
+	AMDGPU_UCODE_ID_SDMA_RS64,
+	AMDGPU_UCODE_ID_CP_CE,
+	AMDGPU_UCODE_ID_CP_PFP,
+	AMDGPU_UCODE_ID_CP_ME,
+	AMDGPU_UCODE_ID_CP_RS64_PFP,
+	AMDGPU_UCODE_ID_CP_RS64_ME,
+	AMDGPU_UCODE_ID_CP_RS64_MEC,
+	AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK,
+	AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK,
+	AMDGPU_UCODE_ID_CP_MEC1,
+	AMDGPU_UCODE_ID_CP_MEC1_JT,
+	AMDGPU_UCODE_ID_CP_MEC2,
+	AMDGPU_UCODE_ID_CP_MEC2_JT,
+	AMDGPU_UCODE_ID_CP_MES,
+	AMDGPU_UCODE_ID_CP_MES_DATA,
+	AMDGPU_UCODE_ID_CP_MES1,
+	AMDGPU_UCODE_ID_CP_MES1_DATA,
+	AMDGPU_UCODE_ID_IMU_I,
+	AMDGPU_UCODE_ID_IMU_D,
+	AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
+	AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
+	AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
+	AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
+	AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+	AMDGPU_UCODE_ID_RLC_IRAM,
+	AMDGPU_UCODE_ID_RLC_DRAM,
+	AMDGPU_UCODE_ID_RLC_P,
+	AMDGPU_UCODE_ID_RLC_V,
+	AMDGPU_UCODE_ID_RLC_G,
+	AMDGPU_UCODE_ID_STORAGE,
+	AMDGPU_UCODE_ID_SMC,
+	AMDGPU_UCODE_ID_PPTABLE,
+	AMDGPU_UCODE_ID_UVD,
+	AMDGPU_UCODE_ID_UVD1,
+	AMDGPU_UCODE_ID_VCE,
+	AMDGPU_UCODE_ID_VCN,
+	AMDGPU_UCODE_ID_VCN1,
+	AMDGPU_UCODE_ID_DMCU_ERAM,
+	AMDGPU_UCODE_ID_DMCU_INTV,
+	AMDGPU_UCODE_ID_VCN0_RAM,
+	AMDGPU_UCODE_ID_VCN1_RAM,
+	AMDGPU_UCODE_ID_DMCUB,
+	AMDGPU_UCODE_ID_VPE_CTX,
+	AMDGPU_UCODE_ID_VPE_CTL,
+	AMDGPU_UCODE_ID_VPE,
+	AMDGPU_UCODE_ID_UMSCH_MM_UCODE,
+	AMDGPU_UCODE_ID_UMSCH_MM_DATA,
+	AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+	AMDGPU_UCODE_ID_P2S_TABLE,
+	AMDGPU_UCODE_ID_JPEG_RAM,
+	AMDGPU_UCODE_ID_ISP,
+	AMDGPU_UCODE_ID_MAXIMUM,
+};
+
+/* engine firmware status */
+enum AMDGPU_UCODE_STATUS {
+	AMDGPU_UCODE_STATUS_INVALID,
+	AMDGPU_UCODE_STATUS_NOT_LOADED,
+	AMDGPU_UCODE_STATUS_LOADED,
+};
+
+enum amdgpu_firmware_load_type {
+	AMDGPU_FW_LOAD_DIRECT = 0,
+	AMDGPU_FW_LOAD_PSP,
+	AMDGPU_FW_LOAD_SMU,
+	AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
+};
+
+enum amdgpu_ucode_required {
+	AMDGPU_UCODE_OPTIONAL,
+	AMDGPU_UCODE_REQUIRED,
+};
+
+/* conform to smu_ucode_xfer_cz.h */
+#define AMDGPU_SDMA0_UCODE_LOADED	0x00000001
+#define AMDGPU_SDMA1_UCODE_LOADED	0x00000002
+#define AMDGPU_CPCE_UCODE_LOADED	0x00000004
+#define AMDGPU_CPPFP_UCODE_LOADED	0x00000008
+#define AMDGPU_CPME_UCODE_LOADED	0x00000010
+#define AMDGPU_CPMEC1_UCODE_LOADED	0x00000020
+#define AMDGPU_CPMEC2_UCODE_LOADED	0x00000040
+#define AMDGPU_CPRLC_UCODE_LOADED	0x00000100
+
+/* amdgpu firmware info */
+struct amdgpu_firmware_info {
+	/* ucode ID */
+	enum AMDGPU_UCODE_ID ucode_id;
+	/* request_firmware */
+	const struct firmware *fw;
+	/* starting mc address */
+	uint64_t mc_addr;
+	/* kernel linear address */
+	void *kaddr;
+	/* ucode_size_bytes */
+	uint32_t ucode_size;
+	/* starting tmr mc address */
+	uint32_t tmr_mc_addr_lo;
+	uint32_t tmr_mc_addr_hi;
+};
+
+struct amdgpu_firmware {
+	struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
+	enum amdgpu_firmware_load_type load_type;
+	struct amdgpu_bo *fw_buf;
+	unsigned int fw_size;
+	unsigned int max_ucodes;
+	/* firmwares are loaded by psp instead of smu from vega10 */
+	const struct amdgpu_psp_funcs *funcs;
+	struct amdgpu_bo *rbuf;
+	struct mutex mutex;
+
+	/* gpu info firmware data pointer */
+	const struct firmware *gpu_info_fw;
+
+	void *fw_buf_ptr;
+	uint64_t fw_buf_mc;
+	uint32_t pldm_version;
+};
+
+struct kicker_device{
+	unsigned short device;
+	u8 revision;
+};
+
+void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
+__printf(4, 5)
+int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
+			 enum amdgpu_ucode_required required, const char *fmt, ...);
+void amdgpu_ucode_release(const struct firmware **fw);
+bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
+				uint16_t hdr_major, uint16_t hdr_minor);
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
+
+enum amdgpu_firmware_load_type
+amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
+
+const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
new file mode 100644
index 000000000000..3f0b0e9af4f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -0,0 +1,627 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/sort.h>
+#include "amdgpu.h"
+#include "umc_v6_7.h"
+#include "amdgpu_ras_mgr.h"
+#define MAX_UMC_POISON_POLLING_TIME_SYNC   20  //ms
+
+#define MAX_UMC_HASH_STRING_SIZE  256
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+				    struct ras_err_data *err_data, uint64_t err_addr,
+				    uint32_t ch_inst, uint32_t umc_inst)
+{
+	switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+	case IP_VERSION(6, 7, 0):
+		umc_v6_7_convert_error_address(adev,
+				err_data, err_addr, ch_inst, umc_inst);
+		break;
+	default:
+		dev_warn(adev->dev,
+			 "UMC address to Physical address translation is not supported\n");
+		return AMDGPU_RAS_FAIL;
+	}
+
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+	struct ras_err_data err_data;
+	int ret;
+
+	ret = amdgpu_ras_error_data_init(&err_data);
+	if (ret)
+		return ret;
+
+	err_data.err_addr =
+		kcalloc(adev->umc.max_ras_err_cnt_per_query,
+			sizeof(struct eeprom_table_record), GFP_KERNEL);
+	if (!err_data.err_addr) {
+		dev_warn(adev->dev,
+			"Failed to alloc memory for umc error record in MCA notifier!\n");
+		ret = AMDGPU_RAS_FAIL;
+		goto out_fini_err_data;
+	}
+
+	err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
+	/*
+	 * Translate UMC channel address to Physical address
+	 */
+	ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+					ch_inst, umc_inst);
+	if (ret)
+		goto out_free_err_addr;
+
+	if (amdgpu_bad_page_threshold != 0) {
+		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+						err_data.err_addr_cnt, false);
+		amdgpu_ras_save_bad_pages(adev, NULL);
+	}
+
+out_free_err_addr:
+	kfree(err_data.err_addr);
+
+out_fini_err_data:
+	amdgpu_ras_error_data_fini(&err_data);
+
+	return ret;
+}
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+			void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct amdgpu_ras_eeprom_control *control = &con->eeprom_control;
+	unsigned int error_query_mode;
+	int ret = 0;
+	unsigned long err_count;
+
+	amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
+
+	err_data->err_addr =
+		kcalloc(adev->umc.max_ras_err_cnt_per_query,
+			sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+	/* still call query_ras_error_address to clear error status
+	 * even NOMEM error is encountered
+	 */
+	if (!err_data->err_addr)
+		dev_warn(adev->dev,
+			"Failed to alloc memory for umc error address record!\n");
+	else
+		err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
+	mutex_lock(&con->page_retirement_lock);
+	if (!amdgpu_ras_smu_eeprom_supported(adev)) {
+		ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
+		if (ret == -EOPNOTSUPP &&
+		    error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+			if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+			    adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+				adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev,
+								ras_error_status);
+
+			if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+			    adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
+			    adev->umc.max_ras_err_cnt_per_query) {
+				err_data->err_addr =
+					kcalloc(adev->umc.max_ras_err_cnt_per_query,
+						sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+				/* still call query_ras_error_address to clear error status
+				 * even NOMEM error is encountered
+				 */
+				if (!err_data->err_addr)
+					dev_warn(adev->dev,
+						"Failed to alloc memory for umc error address record!\n");
+				else
+					err_data->err_addr_len =
+						adev->umc.max_ras_err_cnt_per_query;
+
+				/* umc query_ras_error_address is also responsible for clearing
+				 * error status
+				 */
+				adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev,
+								ras_error_status);
+			}
+		} else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+		    (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
+			if (adev->umc.ras &&
+			    adev->umc.ras->ecc_info_query_ras_error_count)
+				adev->umc.ras->ecc_info_query_ras_error_count(adev,
+								ras_error_status);
+
+			if (adev->umc.ras &&
+			    adev->umc.ras->ecc_info_query_ras_error_address &&
+			    adev->umc.max_ras_err_cnt_per_query) {
+				err_data->err_addr =
+					kcalloc(adev->umc.max_ras_err_cnt_per_query,
+						sizeof(struct eeprom_table_record), GFP_KERNEL);
+
+				/* still call query_ras_error_address to clear error status
+				 * even NOMEM error is encountered
+				 */
+				if (!err_data->err_addr)
+					dev_warn(adev->dev,
+						"Failed to alloc memory for umc error address record!\n");
+				else
+					err_data->err_addr_len =
+						adev->umc.max_ras_err_cnt_per_query;
+
+				/* umc query_ras_error_address is also responsible for clearing
+				 * error status
+				 */
+				adev->umc.ras->ecc_info_query_ras_error_address(adev,
+								ras_error_status);
+			}
+		}
+	} else {
+		if (!amdgpu_ras_eeprom_update_record_num(control)) {
+			err_data->err_addr_cnt = err_data->de_count =
+				control->ras_num_recs -	control->ras_num_recs_old;
+			amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
+				control->ras_num_recs_old, err_data->de_count);
+		}
+	}
+
+	/* only uncorrectable error needs gpu reset */
+	if (err_data->ue_count || err_data->de_count) {
+		err_count = err_data->ue_count + err_data->de_count;
+		if ((amdgpu_bad_page_threshold != 0) &&
+			err_data->err_addr_cnt) {
+			amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+				err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
+			amdgpu_ras_save_bad_pages(adev, &err_count);
+
+			amdgpu_dpm_send_hbm_bad_pages_num(adev,
+					con->eeprom_control.ras_num_bad_pages);
+
+			if (con->update_channel_flag == true) {
+				amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+				con->update_channel_flag = false;
+			}
+		}
+	}
+
+	kfree(err_data->err_addr);
+	err_data->err_addr = NULL;
+
+	mutex_unlock(&con->page_retirement_lock);
+}
+
+static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
+		void *ras_error_status,
+		struct amdgpu_iv_entry *entry,
+		uint32_t reset)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+	amdgpu_umc_handle_bad_pages(adev, ras_error_status);
+
+	if ((err_data->ue_count || err_data->de_count) &&
+	    (reset || amdgpu_ras_is_rma(adev))) {
+		con->gpu_reset_flags |= reset;
+		amdgpu_ras_reset_gpu(adev);
+	}
+
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint16_t pasid,
+			pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+	int ret = AMDGPU_RAS_SUCCESS;
+
+	if (adev->gmc.xgmi.connected_to_cpu ||
+		adev->gmc.is_app_apu) {
+		if (reset) {
+			/* MCA poison handler is only responsible for GPU reset,
+			 * let MCA notifier do page retirement.
+			 */
+			kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+			amdgpu_ras_reset_gpu(adev);
+		}
+		return ret;
+	}
+
+	if (!amdgpu_sriov_vf(adev)) {
+		if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
+			struct ras_err_data err_data;
+			struct ras_common_if head = {
+				.block = AMDGPU_RAS_BLOCK__UMC,
+			};
+			struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+			ret = amdgpu_ras_error_data_init(&err_data);
+			if (ret)
+				return ret;
+
+			ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+			if (ret == AMDGPU_RAS_SUCCESS && obj) {
+				obj->err_data.ue_count += err_data.ue_count;
+				obj->err_data.ce_count += err_data.ce_count;
+				obj->err_data.de_count += err_data.de_count;
+			}
+
+			amdgpu_ras_error_data_fini(&err_data);
+		} else if (amdgpu_uniras_enabled(adev)) {
+			struct ras_ih_info ih_info = {0};
+
+			ih_info.block = block;
+			ih_info.pasid = pasid;
+			ih_info.reset = reset;
+			ih_info.pasid_fn = pasid_fn;
+			ih_info.data = data;
+			amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
+		} else {
+			struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+			int ret;
+
+			ret = amdgpu_ras_put_poison_req(adev,
+				block, pasid, pasid_fn, data, reset);
+			if (!ret) {
+				atomic_inc(&con->page_retirement_req_cnt);
+				atomic_inc(&con->poison_consumption_count);
+				wake_up(&con->page_retirement_wq);
+			}
+		}
+	} else {
+		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+			adev->virt.ops->ras_poison_handler(adev, block);
+		else
+			dev_warn(adev->dev,
+				"No ras_poison_handler interface in SRIOV!\n");
+	}
+
+	return ret;
+}
+
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint32_t reset)
+{
+	return amdgpu_umc_pasid_poison_handler(adev,
+				block, 0, NULL, NULL, reset);
+}
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+		void *ras_error_status,
+		struct amdgpu_iv_entry *entry)
+{
+	return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry,
+				AMDGPU_RAS_GPU_RESET_MODE1_RESET);
+}
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_umc_ras *ras;
+
+	if (!adev->umc.ras)
+		return 0;
+
+	ras = adev->umc.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register umc ras block!\n");
+		return err;
+	}
+
+	strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->umc.ras_if = &ras->ras_block.ras_comm;
+
+	if (!ras->ras_block.ras_late_init)
+		ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+	if (!ras->ras_block.ras_cb)
+		ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+
+	return 0;
+}
+
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r;
+
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	if (amdgpu_sriov_vf(adev))
+		return r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+		if (r)
+			goto late_fini;
+	}
+
+	/* ras init of specific umc version */
+	if (adev->umc.ras &&
+	    adev->umc.ras->err_cnt_init)
+		adev->umc.ras->err_cnt_init(adev);
+
+	return 0;
+
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
+
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+		struct amdgpu_irq_src *source,
+		struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->umc.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	ih_data.head = *ras_if;
+
+	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	return 0;
+}
+
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+		uint64_t err_addr,
+		uint64_t retired_page,
+		uint32_t channel_index,
+		uint32_t umc_inst)
+{
+	struct eeprom_table_record *err_rec;
+
+	if (!err_data ||
+	    !err_data->err_addr ||
+	    (err_data->err_addr_cnt >= err_data->err_addr_len))
+		return -EINVAL;
+
+	err_rec = &err_data->err_addr[err_data->err_addr_cnt];
+
+	err_rec->address = err_addr;
+	/* page frame address is saved */
+	err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+	err_rec->ts = (uint64_t)ktime_get_real_seconds();
+	err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+	err_rec->cu = 0;
+	err_rec->mem_channel = channel_index;
+	err_rec->mcumc_id = umc_inst;
+
+	err_data->err_addr_cnt++;
+
+	return 0;
+}
+
+static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func,
+				   void *data)
+{
+	uint32_t umc_node_inst;
+	uint32_t node_inst;
+	uint32_t umc_inst;
+	uint32_t ch_inst;
+	int ret;
+
+	/*
+	 * This loop is done based on the following -
+	 * umc.active mask = mask of active umc instances across all nodes
+	 * umc.umc_inst_num = maximum number of umc instancess per node
+	 * umc.node_inst_num = maximum number of node instances
+	 * Channel instances are not assumed to be harvested.
+	 */
+	dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
+		adev->umc.active_mask, adev->umc.umc_inst_num);
+	for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
+			 adev->umc.node_inst_num * adev->umc.umc_inst_num) {
+		node_inst = umc_node_inst / adev->umc.umc_inst_num;
+		umc_inst = umc_node_inst % adev->umc.umc_inst_num;
+		LOOP_UMC_CH_INST(ch_inst) {
+			dev_dbg(adev->dev,
+				"node_inst :%d umc_inst: %d ch_inst: %d",
+				node_inst, umc_inst, ch_inst);
+			ret = func(adev, node_inst, umc_inst, ch_inst, data);
+			if (ret) {
+				dev_err(adev->dev,
+					"Node %d umc %d ch %d func returns %d\n",
+					node_inst, umc_inst, ch_inst, ret);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+			umc_func func, void *data)
+{
+	uint32_t node_inst       = 0;
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	int ret = 0;
+
+	if (adev->aid_mask)
+		return amdgpu_umc_loop_all_aid(adev, func, data);
+
+	if (adev->umc.node_inst_num) {
+		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+			ret = func(adev, node_inst, umc_inst, ch_inst, data);
+			if (ret) {
+				dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
+					node_inst, umc_inst, ch_inst, ret);
+				return ret;
+			}
+		}
+	} else {
+		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+			ret = func(adev, 0, umc_inst, ch_inst, data);
+			if (ret) {
+				dev_err(adev->dev, "Umc %d ch %d func returns %d\n",
+					umc_inst, ch_inst, ret);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+				uint64_t status, uint64_t ipid, uint64_t addr)
+{
+	if (adev->umc.ras->update_ecc_status)
+		return adev->umc.ras->update_ecc_status(adev,
+					status, ipid, addr);
+	return 0;
+}
+
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+		struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_ecc_log_info *ecc_log;
+	int ret;
+
+	ecc_log = &con->umc_ecc_log;
+
+	mutex_lock(&ecc_log->lock);
+	ret = radix_tree_insert(ecc_tree, ecc_err->pa_pfn, ecc_err);
+	if (!ret)
+		radix_tree_tag_set(ecc_tree,
+			ecc_err->pa_pfn, UMC_ECC_NEW_DETECTED_TAG);
+	mutex_unlock(&ecc_log->lock);
+
+	return ret;
+}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+			struct ras_err_data *err_data, uint64_t pa_addr)
+{
+	struct ta_ras_query_address_output addr_out;
+
+	/* reinit err_data */
+	err_data->err_addr_cnt = 0;
+	err_data->err_addr_len = adev->umc.retire_unit;
+
+	addr_out.pa.pa = pa_addr;
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+		return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+				&addr_out, false);
+	else
+		return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+			uint64_t pa_addr, uint64_t *pfns, int len)
+{
+	int i, ret;
+	struct ras_err_data err_data;
+
+	err_data.err_addr = kcalloc(adev->umc.retire_unit,
+				sizeof(struct eeprom_table_record), GFP_KERNEL);
+	if (!err_data.err_addr) {
+		dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+		return 0;
+	}
+
+	ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < adev->umc.retire_unit; i++) {
+		if (i >= len)
+			goto out;
+
+		pfns[i] = err_data.err_addr[i].retired_page;
+	}
+	ret = i;
+	adev->umc.err_addr_cnt = err_data.err_addr_cnt;
+
+out:
+	kfree(err_data.err_addr);
+	return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch, uint32_t umc,
+			uint32_t node, uint32_t socket,
+			struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+	struct ta_ras_query_address_input addr_in;
+	int ret;
+
+	memset(&addr_in, 0, sizeof(addr_in));
+	addr_in.ma.err_addr = err_addr;
+	addr_in.ma.ch_inst = ch;
+	addr_in.ma.umc_inst = umc;
+	addr_in.ma.node_inst = node;
+	addr_in.ma.socket_id = socket;
+
+	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+		ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+				addr_out, dump_addr);
+		if (ret)
+			return ret;
+	} else {
+		return 0;
+	}
+
+	return 0;
+}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+		uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+	struct ta_ras_query_address_input addr_in;
+	struct ta_ras_query_address_output addr_out;
+	int ret;
+
+	/* nps: the pa belongs to */
+	addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+	addr_in.addr_type = TA_RAS_PA_TO_MCA;
+	ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+	if (ret) {
+		dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+			pa);
+
+		return ret;
+	}
+
+	*mca = addr_out.ma.err_addr;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
new file mode 100644
index 000000000000..28dff750c47e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2019  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_UMC_H__
+#define __AMDGPU_UMC_H__
+#include "amdgpu_ras.h"
+#include "amdgpu_mca.h"
+/*
+ * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
+ * is the index of 4KB block
+ */
+#define ADDR_OF_4KB_BLOCK(addr)			(((addr) & ~0xffULL) << 4)
+/*
+ * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_8KB_BLOCK(addr)			(((addr) & ~0xffULL) << 5)
+/*
+ * (addr / 256) * 32768, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_32KB_BLOCK(addr)			(((addr) & ~0xffULL) << 7)
+/* channel index is the index of 256B block */
+#define ADDR_OF_256B_BLOCK(channel_index)	((channel_index) << 8)
+/* offset in 256B block */
+#define OFFSET_IN_256B_BLOCK(addr)		((addr) & 0xffULL)
+
+#define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++)
+#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
+#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+
+#define LOOP_UMC_NODE_INST(node_inst) \
+		for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num)
+
+#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
+		LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG       0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ *    range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ *    range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2	BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+	uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+	uint32_t flip_row_bit;
+	uint32_t r13_in_pa;
+	uint32_t bit_num;
+};
+
+typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
+			uint32_t umc_inst, uint32_t ch_inst, void *data);
+
+struct amdgpu_umc_ras {
+	struct amdgpu_ras_block_object ras_block;
+	void (*err_cnt_init)(struct amdgpu_device *adev);
+	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
+	void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
+				      void *ras_error_status);
+	void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
+					void *ras_error_status);
+	bool (*check_ecc_err_status)(struct amdgpu_device *adev,
+			enum amdgpu_mca_error_type type, void *ras_error_status);
+	int (*update_ecc_status)(struct amdgpu_device *adev,
+			uint64_t status, uint64_t ipid, uint64_t addr);
+	int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+			struct ras_err_data *err_data,
+			struct ta_ras_query_address_input *addr_in,
+			struct ta_ras_query_address_output *addr_out,
+			bool dump_addr);
+	uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+			uint64_t mca_addr, uint64_t retired_page);
+	void (*get_retire_flip_bits)(struct amdgpu_device *adev);
+	void (*mca_ipid_parse)(struct amdgpu_device *adev, uint64_t ipid,
+			uint32_t *did, uint32_t *ch, uint32_t *umc_inst, uint32_t *sid);
+};
+
+struct amdgpu_umc_funcs {
+	void (*init_registers)(struct amdgpu_device *adev);
+};
+
+struct amdgpu_umc {
+	/* max error count in one ras query call */
+	uint32_t max_ras_err_cnt_per_query;
+	/* number of umc channel instance with memory map register access */
+	uint32_t channel_inst_num;
+	/* number of umc instance with memory map register access */
+	uint32_t umc_inst_num;
+
+	/* Total number of umc node instance including harvest one */
+	uint32_t node_inst_num;
+
+	/* UMC regiser per channel offset */
+	uint32_t channel_offs;
+	/* how many pages are retired in one UE */
+	uint32_t retire_unit;
+	/* channel index table of interleaved memory */
+	const uint32_t *channel_idx_tbl;
+	struct ras_common_if *ras_if;
+
+	const struct amdgpu_umc_funcs *funcs;
+	struct amdgpu_umc_ras *ras;
+
+	/* active mask for umc node instance */
+	unsigned long active_mask;
+
+	struct amdgpu_umc_flip_bits flip_bits;
+
+	unsigned long err_addr_cnt;
+};
+
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+			enum amdgpu_ras_block block, uint16_t pasid,
+			pasid_notify pasid_fn, void *data, uint32_t reset);
+int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
+		struct amdgpu_irq_src *source,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+		uint64_t err_addr,
+		uint64_t retired_page,
+		uint32_t channel_index,
+		uint32_t umc_inst);
+
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+		void *ras_error_status,
+		struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
+
+int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
+			umc_func func, void *data);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+				uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+		struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+			void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+			struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+			uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch, uint32_t umc,
+			uint32_t node, uint32_t socket,
+			struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+		uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
new file mode 100644
index 000000000000..5b27fc41ffbf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/ioctl.h>
+
+/*
+ * MMIO debugfs IOCTL structure
+ */
+struct amdgpu_debugfs_regs2_iocdata {
+	__u32 use_srbm, use_grbm, pg_lock;
+	struct {
+		__u32 se, sh, instance;
+	} grbm;
+	struct {
+		__u32 me, pipe, queue, vmid;
+	} srbm;
+};
+
+struct amdgpu_debugfs_regs2_iocdata_v2 {
+	__u32 use_srbm, use_grbm, pg_lock;
+	struct {
+		__u32 se, sh, instance;
+	} grbm;
+	struct {
+		__u32 me, pipe, queue, vmid;
+	} srbm;
+	u32 xcc_id;
+};
+
+struct amdgpu_debugfs_gprwave_iocdata {
+	u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id;
+	struct {
+		u32 thread, vpgr_or_sgpr;
+	} gpr;
+};
+
+/*
+ * MMIO debugfs state data (per file* handle)
+ */
+struct amdgpu_debugfs_regs2_data {
+	struct amdgpu_device *adev;
+	struct mutex lock;
+	struct amdgpu_debugfs_regs2_iocdata_v2 id;
+};
+
+struct amdgpu_debugfs_gprwave_data {
+	struct amdgpu_device *adev;
+	struct mutex lock;
+	struct amdgpu_debugfs_gprwave_iocdata id;
+};
+
+enum AMDGPU_DEBUGFS_REGS2_CMDS {
+	AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0,
+	AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2,
+};
+
+enum AMDGPU_DEBUGFS_GPRWAVE_CMDS {
+	AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0,
+};
+
+//reg2 interface
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata)
+#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2)
+
+//gprwave interface
+#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
new file mode 100644
index 000000000000..cd707d70a0bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_umsch_mm.h"
+#include "umsch_mm_v4_0.h"
+
+MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws)
+{
+	struct amdgpu_ring *ring = &umsch->ring;
+
+	if (amdgpu_ring_alloc(ring, ndws))
+		return -ENOMEM;
+
+	amdgpu_ring_write_multiple(ring, pkt, ndws);
+	amdgpu_ring_commit(ring);
+
+	return 0;
+}
+
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch)
+{
+	struct amdgpu_ring *ring = &umsch->ring;
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout);
+	if (r < 1) {
+		dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n",
+			ring->fence_drv.sync_seq);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell)
+		WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
+	else
+		WREG32(umsch->rb_wptr, ring->wptr << 2);
+}
+
+static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32(umsch->rb_rptr);
+}
+
+static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring;
+	struct amdgpu_device *adev = ring->adev;
+
+	return RREG32(umsch->rb_wptr);
+}
+
+static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_UMSCH_MM,
+	.align_mask = 0,
+	.nop = 0,
+	.support_64bit_ptrs = false,
+	.get_rptr = umsch_mm_ring_get_rptr,
+	.get_wptr = umsch_mm_ring_get_wptr,
+	.set_wptr = umsch_mm_ring_set_wptr,
+	.insert_nop = amdgpu_ring_insert_nop,
+};
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch)
+{
+	struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm);
+	struct amdgpu_ring *ring = &umsch->ring;
+
+	ring->vm_hub = AMDGPU_MMHUB0(0);
+	ring->use_doorbell = true;
+	ring->no_scheduler = true;
+	ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6;
+
+	snprintf(ring->name, sizeof(ring->name), "umsch");
+
+	return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
+{
+	const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+	struct amdgpu_device *adev = umsch->ring.adev;
+	const char *fw_name = NULL;
+	int r;
+
+	switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+	case IP_VERSION(4, 0, 5):
+	case IP_VERSION(4, 0, 6):
+		fw_name = "4_0_0";
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+				 "amdgpu/umsch_mm_%s.bin", fw_name);
+	if (r) {
+		release_firmware(adev->umsch_mm.fw);
+		adev->umsch_mm.fw = NULL;
+		return r;
+	}
+
+	umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data;
+
+	adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+	adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+	adev->umsch_mm.irq_start_addr =
+		le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) |
+		((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32);
+	adev->umsch_mm.uc_start_addr =
+		le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) |
+		((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32);
+	adev->umsch_mm.data_start_addr =
+		le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) |
+		((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		struct amdgpu_firmware_info *info;
+
+		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE];
+		info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE;
+		info->fw = adev->umsch_mm.fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE);
+
+		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA];
+		info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA;
+		info->fw = adev->umsch_mm.fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE);
+	}
+
+	return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch)
+{
+	const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+	struct amdgpu_device *adev = umsch->ring.adev;
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	int r;
+
+	umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+		       adev->umsch_mm.fw->data;
+
+	fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+		  le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes));
+	fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, fw_size,
+				      4 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->umsch_mm.ucode_fw_obj,
+				      &adev->umsch_mm.ucode_fw_gpu_addr,
+				      (void **)&adev->umsch_mm.ucode_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r);
+		return r;
+	}
+
+	memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj);
+	amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj);
+	return 0;
+}
+
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch)
+{
+	const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr;
+	struct amdgpu_device *adev = umsch->ring.adev;
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	int r;
+
+	umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)
+		       adev->umsch_mm.fw->data;
+
+	fw_data = (const __le32 *)(adev->umsch_mm.fw->data +
+		  le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes));
+	fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, fw_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->umsch_mm.data_fw_obj,
+				      &adev->umsch_mm.data_fw_gpu_addr,
+				      (void **)&adev->umsch_mm.data_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r);
+		return r;
+	}
+
+	memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj);
+	amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj);
+	return 0;
+}
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch)
+{
+	struct amdgpu_device *adev = umsch->ring.adev;
+	struct amdgpu_firmware_info ucode = {
+		.ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER,
+		.mc_addr = adev->umsch_mm.cmd_buf_gpu_addr,
+		.ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr -
+			      (uintptr_t)adev->umsch_mm.cmd_buf_ptr),
+	};
+
+	return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static void umsch_mm_agdb_index_init(struct amdgpu_device *adev)
+{
+	uint32_t umsch_mm_agdb_start;
+	int i;
+
+	umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1;
+	umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024);
+	umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1);
+
+	for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++)
+		adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i;
+}
+
+static int umsch_mm_init(struct amdgpu_device *adev)
+{
+	int r;
+
+	adev->umsch_mm.vmid_mask_mm_vpe = 0xf00;
+	adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE);
+	adev->umsch_mm.vpe_hqd_mask = 0xfe;
+
+	r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index);
+	if (r) {
+		dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r);
+		return r;
+	}
+
+	adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr +
+					  (adev->umsch_mm.wb_index * 4);
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->umsch_mm.cmd_buf_obj,
+				    &adev->umsch_mm.cmd_buf_gpu_addr,
+				    (void **)&adev->umsch_mm.cmd_buf_ptr);
+	if (r) {
+		dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r);
+		amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+		return r;
+	}
+
+	r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->umsch_mm.dbglog_bo,
+				    &adev->umsch_mm.log_gpu_addr,
+				    &adev->umsch_mm.log_cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r);
+		return r;
+	}
+
+	mutex_init(&adev->umsch_mm.mutex_hidden);
+
+	umsch_mm_agdb_index_init(adev);
+
+	return 0;
+}
+
+
+static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
+	case IP_VERSION(4, 0, 5):
+	case IP_VERSION(4, 0, 6):
+		umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs;
+	umsch_mm_set_regs(&adev->umsch_mm);
+
+	return 0;
+}
+
+static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+		return 0;
+
+	return 0;
+}
+
+static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = umsch_mm_init(adev);
+	if (r)
+		return r;
+
+	amdgpu_umsch_fwlog_init(&adev->umsch_mm);
+	r = umsch_mm_ring_init(&adev->umsch_mm);
+	if (r)
+		return r;
+
+	r = umsch_mm_init_microcode(&adev->umsch_mm);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	release_firmware(adev->umsch_mm.fw);
+	adev->umsch_mm.fw = NULL;
+
+	amdgpu_ring_fini(&adev->umsch_mm.ring);
+
+	mutex_destroy(&adev->umsch_mm.mutex_hidden);
+
+	amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj,
+			      &adev->umsch_mm.cmd_buf_gpu_addr,
+			      (void **)&adev->umsch_mm.cmd_buf_ptr);
+
+	amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo,
+				    &adev->umsch_mm.log_gpu_addr,
+				    (void **)&adev->umsch_mm.log_cpu_addr);
+
+	amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
+
+	return 0;
+}
+
+static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = umsch_mm_load_microcode(&adev->umsch_mm);
+	if (r)
+		return r;
+
+	umsch_mm_ring_start(&adev->umsch_mm);
+
+	r = umsch_mm_set_hw_resources(&adev->umsch_mm);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	umsch_mm_ring_stop(&adev->umsch_mm);
+
+	amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj,
+			      &adev->umsch_mm.data_fw_gpu_addr,
+			      (void **)&adev->umsch_mm.data_fw_ptr);
+
+	amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj,
+			      &adev->umsch_mm.ucode_fw_gpu_addr,
+			      (void **)&adev->umsch_mm.ucode_fw_ptr);
+	return 0;
+}
+
+static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return umsch_mm_hw_fini(ip_block);
+}
+
+static int umsch_mm_resume(struct amdgpu_ip_block *ip_block)
+{
+	return umsch_mm_hw_init(ip_block);
+}
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+	void *fw_log_cpu_addr = umsch_mm->log_cpu_addr;
+	volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr;
+
+	log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog);
+	log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE;
+	log_buf->rptr = log_buf->header_size;
+	log_buf->wptr = log_buf->header_size;
+	log_buf->wrapped = 0;
+#endif
+}
+
+/*
+ * debugfs for mapping umsch firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf,
+					     size_t size, loff_t *pos)
+{
+	struct amdgpu_umsch_mm *umsch_mm;
+	void *log_buf;
+	volatile struct amdgpu_umsch_fwlog *plog;
+	unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+	unsigned int read_num[2] = {0};
+
+	umsch_mm = file_inode(f)->i_private;
+	if (!umsch_mm)
+		return -ENODEV;
+
+	if (!umsch_mm->log_cpu_addr)
+		return -EFAULT;
+
+	log_buf = umsch_mm->log_cpu_addr;
+
+	plog = (volatile struct amdgpu_umsch_fwlog *)log_buf;
+	read_pos = plog->rptr;
+	write_pos = plog->wptr;
+
+	if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE)
+		return -EFAULT;
+
+	if (!size || (read_pos == write_pos))
+		return 0;
+
+	if (write_pos > read_pos) {
+		available = write_pos - read_pos;
+		read_num[0] = min_t(size_t, size, available);
+	} else {
+		read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos;
+		available = read_num[0] + write_pos - plog->header_size;
+		if (size > available)
+			read_num[1] = write_pos - plog->header_size;
+		else if (size > read_num[0])
+			read_num[1] = size - read_num[0];
+		else
+			read_num[0] = size;
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (read_num[i]) {
+			if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE)
+				read_pos = plog->header_size;
+			if (read_num[i] == copy_to_user((buf + read_bytes),
+							(log_buf + read_pos), read_num[i]))
+				return -EFAULT;
+
+			read_bytes += read_num[i];
+			read_pos += read_num[i];
+		}
+	}
+
+	plog->rptr = read_pos;
+	*pos += read_bytes;
+	return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_umschfwlog_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_umsch_fwlog_read,
+	.llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+			struct amdgpu_umsch_mm *umsch_mm)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	sprintf(name, "amdgpu_umsch_fwlog");
+	debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm,
+				 &amdgpu_debugfs_umschfwlog_fops,
+				 AMDGPU_UMSCHFW_LOG_SIZE);
+#endif
+}
+
+static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
+	.name = "umsch_mm_v4_0",
+	.early_init = umsch_mm_early_init,
+	.late_init = umsch_mm_late_init,
+	.sw_init = umsch_mm_sw_init,
+	.sw_fini = umsch_mm_sw_fini,
+	.hw_init = umsch_mm_hw_init,
+	.hw_fini = umsch_mm_hw_fini,
+	.suspend = umsch_mm_suspend,
+	.resume = umsch_mm_resume,
+};
+
+const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_UMSCH_MM,
+	.major = 4,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &umsch_mm_v4_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
new file mode 100644
index 000000000000..2c771a753778
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UMSCH_MM_H__
+#define __AMDGPU_UMSCH_MM_H__
+
+enum UMSCH_SWIP_ENGINE_TYPE {
+	UMSCH_SWIP_ENGINE_TYPE_VCN0 = 0,
+	UMSCH_SWIP_ENGINE_TYPE_VCN1 = 1,
+	UMSCH_SWIP_ENGINE_TYPE_VCN = 2,
+	UMSCH_SWIP_ENGINE_TYPE_VPE = 3,
+	UMSCH_SWIP_ENGINE_TYPE_MAX
+};
+
+enum UMSCH_CONTEXT_PRIORITY_LEVEL {
+	CONTEXT_PRIORITY_LEVEL_IDLE = 0,
+	CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
+	CONTEXT_PRIORITY_LEVEL_FOCUS = 2,
+	CONTEXT_PRIORITY_LEVEL_REALTIME = 3,
+	CONTEXT_PRIORITY_NUM_LEVELS
+};
+
+struct umsch_mm_set_resource_input {
+	uint32_t vmid_mask_mm_vcn;
+	uint32_t vmid_mask_mm_vpe;
+	uint32_t collaboration_mask_vpe;
+	uint32_t logging_vmid;
+	uint32_t engine_mask;
+	union {
+		struct {
+			uint32_t disable_reset : 1;
+			uint32_t disable_umsch_mm_log : 1;
+			uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+			uint32_t reserved : 29;
+		};
+		uint32_t uint32_all;
+	};
+};
+
+struct amdgpu_umsch_fwlog {
+	uint32_t rptr;
+	uint32_t wptr;
+	uint32_t buffer_size;
+	uint32_t header_size;
+	uint32_t wrapped;
+};
+
+struct umsch_mm_add_queue_input {
+	uint32_t process_id;
+	uint64_t page_table_base_addr;
+	uint64_t process_va_start;
+	uint64_t process_va_end;
+	uint64_t process_quantum;
+	uint64_t process_csa_addr;
+	uint64_t context_quantum;
+	uint64_t context_csa_addr;
+	uint32_t inprocess_context_priority;
+	enum UMSCH_CONTEXT_PRIORITY_LEVEL context_global_priority_level;
+	uint32_t doorbell_offset_0;
+	uint32_t doorbell_offset_1;
+	enum UMSCH_SWIP_ENGINE_TYPE engine_type;
+	uint32_t affinity;
+	uint64_t mqd_addr;
+	uint64_t h_context;
+	uint64_t h_queue;
+	uint32_t vm_context_cntl;
+
+	uint32_t process_csa_array_index;
+	uint32_t context_csa_array_index;
+
+	struct {
+		uint32_t is_context_suspended : 1;
+		uint32_t collaboration_mode : 1;
+		uint32_t reserved : 30;
+	};
+};
+
+struct umsch_mm_remove_queue_input {
+	uint32_t doorbell_offset_0;
+	uint32_t doorbell_offset_1;
+	uint64_t context_csa_addr;
+	uint32_t context_csa_array_index;
+};
+
+struct MQD_INFO {
+	uint32_t rb_base_hi;
+	uint32_t rb_base_lo;
+	uint32_t rb_size;
+	uint32_t wptr_val;
+	uint32_t rptr_val;
+	uint32_t unmapped;
+	uint32_t vmid;
+};
+
+struct amdgpu_umsch_mm;
+
+struct umsch_mm_funcs {
+	int (*set_hw_resources)(struct amdgpu_umsch_mm *umsch);
+	int (*add_queue)(struct amdgpu_umsch_mm *umsch,
+			 struct umsch_mm_add_queue_input *input);
+	int (*remove_queue)(struct amdgpu_umsch_mm *umsch,
+			    struct umsch_mm_remove_queue_input *input);
+	int (*set_regs)(struct amdgpu_umsch_mm *umsch);
+	int (*init_microcode)(struct amdgpu_umsch_mm *umsch);
+	int (*load_microcode)(struct amdgpu_umsch_mm *umsch);
+	int (*ring_init)(struct amdgpu_umsch_mm *umsch);
+	int (*ring_start)(struct amdgpu_umsch_mm *umsch);
+	int (*ring_stop)(struct amdgpu_umsch_mm *umsch);
+	int (*ring_fini)(struct amdgpu_umsch_mm *umsch);
+};
+
+struct amdgpu_umsch_mm {
+	struct amdgpu_ring		ring;
+
+	uint32_t			rb_wptr;
+	uint32_t			rb_rptr;
+
+	const struct umsch_mm_funcs	*funcs;
+
+	const struct firmware		*fw;
+	uint32_t			fw_version;
+	uint32_t			feature_version;
+
+	struct amdgpu_bo		*ucode_fw_obj;
+	uint64_t			ucode_fw_gpu_addr;
+	uint32_t			*ucode_fw_ptr;
+	uint64_t			irq_start_addr;
+	uint64_t			uc_start_addr;
+	uint32_t			ucode_size;
+
+	struct amdgpu_bo		*data_fw_obj;
+	uint64_t			data_fw_gpu_addr;
+	uint32_t			*data_fw_ptr;
+	uint64_t			data_start_addr;
+	uint32_t			data_size;
+
+	struct amdgpu_bo		*cmd_buf_obj;
+	uint64_t			cmd_buf_gpu_addr;
+	uint32_t			*cmd_buf_ptr;
+	uint32_t			*cmd_buf_curr_ptr;
+
+	uint32_t			wb_index;
+	uint64_t			sch_ctx_gpu_addr;
+	uint32_t			*sch_ctx_cpu_addr;
+
+	uint32_t			vmid_mask_mm_vcn;
+	uint32_t			vmid_mask_mm_vpe;
+	uint32_t			engine_mask;
+	uint32_t			vcn0_hqd_mask;
+	uint32_t			vcn1_hqd_mask;
+	uint32_t			vcn_hqd_mask[2];
+	uint32_t			vpe_hqd_mask;
+	uint32_t			agdb_index[CONTEXT_PRIORITY_NUM_LEVELS];
+
+	struct mutex			mutex_hidden;
+	struct amdgpu_bo		*dbglog_bo;
+	void				*log_cpu_addr;
+	uint64_t			log_gpu_addr;
+	uint32_t			mem_size;
+	uint32_t			log_offset;
+};
+
+int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
+int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch);
+int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch);
+
+int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
+			struct amdgpu_umsch_mm *umsch);
+
+void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm);
+
+#define WREG32_SOC15_UMSCH(reg, value)								\
+	do {											\
+		uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg;	\
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {				\
+			*adev->umsch_mm.cmd_buf_curr_ptr++ = (reg_offset << 2);			\
+			*adev->umsch_mm.cmd_buf_curr_ptr++ = value;				\
+		} else	{									\
+			WREG32(reg_offset, value);						\
+		}										\
+	} while (0)
+
+#define umsch_mm_set_hw_resources(umsch) \
+	((umsch)->funcs->set_hw_resources ? (umsch)->funcs->set_hw_resources((umsch)) : 0)
+#define umsch_mm_add_queue(umsch, input) \
+	((umsch)->funcs->add_queue ? (umsch)->funcs->add_queue((umsch), (input)) : 0)
+#define umsch_mm_remove_queue(umsch, input) \
+	((umsch)->funcs->remove_queue ? (umsch)->funcs->remove_queue((umsch), (input)) : 0)
+
+#define umsch_mm_set_regs(umsch) \
+	((umsch)->funcs->set_regs ? (umsch)->funcs->set_regs((umsch)) : 0)
+#define umsch_mm_init_microcode(umsch) \
+	((umsch)->funcs->init_microcode ? (umsch)->funcs->init_microcode((umsch)) : 0)
+#define umsch_mm_load_microcode(umsch) \
+	((umsch)->funcs->load_microcode ? (umsch)->funcs->load_microcode((umsch)) : 0)
+
+#define umsch_mm_ring_init(umsch) \
+	((umsch)->funcs->ring_init ? (umsch)->funcs->ring_init((umsch)) : 0)
+#define umsch_mm_ring_start(umsch) \
+	((umsch)->funcs->ring_start ? (umsch)->funcs->ring_start((umsch)) : 0)
+#define umsch_mm_ring_stop(umsch) \
+	((umsch)->funcs->ring_stop ? (umsch)->funcs->ring_stop((umsch)) : 0)
+#define umsch_mm_ring_fini(umsch) \
+	((umsch)->funcs->ring_fini ? (umsch)->funcs->ring_fini((umsch)) : 0)
+
+static inline void amdgpu_umsch_mm_lock(struct amdgpu_umsch_mm *umsch)
+{
+	mutex_lock(&umsch->mutex_hidden);
+}
+
+static inline void amdgpu_umsch_mm_unlock(struct amdgpu_umsch_mm *umsch)
+{
+	mutex_unlock(&umsch->mutex_hidden);
+}
+
+extern const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..9a969175900e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,1482 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+	int i;
+	u32 userq_ip_mask = 0;
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+		if (adev->userq_funcs[i])
+			userq_ip_mask |= (1 << i);
+	}
+
+	return userq_ip_mask;
+}
+
+static bool amdgpu_userq_is_reset_type_supported(struct amdgpu_device *adev,
+				enum amdgpu_ring_type ring_type, int reset_type)
+{
+
+	if (ring_type < 0 || ring_type >= AMDGPU_RING_TYPE_MAX)
+		return false;
+
+	switch (ring_type) {
+	case AMDGPU_RING_TYPE_GFX:
+		if (adev->gfx.gfx_supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_COMPUTE:
+		if (adev->gfx.compute_supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_SDMA:
+		if (adev->sdma.supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_VCN_DEC:
+	case AMDGPU_RING_TYPE_VCN_ENC:
+		if (adev->vcn.supported_reset & reset_type)
+			return true;
+		break;
+	case AMDGPU_RING_TYPE_VCN_JPEG:
+		if (adev->jpeg.supported_reset & reset_type)
+			return true;
+		break;
+	default:
+		break;
+	}
+	return false;
+}
+
+static void amdgpu_userq_gpu_reset(struct amdgpu_device *adev)
+{
+	if (amdgpu_device_should_recover_gpu(adev)) {
+		amdgpu_reset_domain_schedule(adev->reset_domain,
+					     &adev->userq_reset_work);
+		/* Wait for the reset job to complete */
+		flush_work(&adev->userq_reset_work);
+	}
+}
+
+static int
+amdgpu_userq_detect_and_reset_queues(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const int queue_types[] = {
+		AMDGPU_RING_TYPE_COMPUTE,
+		AMDGPU_RING_TYPE_GFX,
+		AMDGPU_RING_TYPE_SDMA
+	};
+	const int num_queue_types = ARRAY_SIZE(queue_types);
+	bool gpu_reset = false;
+	int r = 0;
+	int i;
+
+	/* Warning if current process mutex is not held */
+	WARN_ON(!mutex_is_locked(&uq_mgr->userq_mutex));
+
+	if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+		dev_err(adev->dev, "userq reset disabled by debug mask\n");
+		return 0;
+	}
+
+	/*
+	 * If GPU recovery feature is disabled system-wide,
+	 * skip all reset detection logic
+	 */
+	if (!amdgpu_gpu_recovery)
+		return 0;
+
+	/*
+	 * Iterate through all queue types to detect and reset problematic queues
+	 * Process each queue type in the defined order
+	 */
+	for (i = 0; i < num_queue_types; i++) {
+		int ring_type = queue_types[i];
+		const struct amdgpu_userq_funcs *funcs = adev->userq_funcs[ring_type];
+
+		if (!amdgpu_userq_is_reset_type_supported(adev, ring_type, AMDGPU_RESET_TYPE_PER_QUEUE))
+				continue;
+
+		if (atomic_read(&uq_mgr->userq_count[ring_type]) > 0 &&
+		    funcs && funcs->detect_and_reset) {
+			r = funcs->detect_and_reset(adev, ring_type);
+			if (r) {
+				gpu_reset = true;
+				break;
+			}
+		}
+	}
+
+	if (gpu_reset)
+		amdgpu_userq_gpu_reset(adev);
+
+	return r;
+}
+
+static int amdgpu_userq_buffer_va_list_add(struct amdgpu_usermode_queue *queue,
+					   struct amdgpu_bo_va_mapping *va_map, u64 addr)
+{
+	struct amdgpu_userq_va_cursor *va_cursor;
+	struct userq_va_list;
+
+	va_cursor = kzalloc(sizeof(*va_cursor), GFP_KERNEL);
+	if (!va_cursor)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&va_cursor->list);
+	va_cursor->gpu_addr = addr;
+	atomic_set(&va_map->bo_va->userq_va_mapped, 1);
+	list_add(&va_cursor->list, &queue->userq_va_list);
+
+	return 0;
+}
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+				   u64 addr, u64 expected_size)
+{
+	struct amdgpu_bo_va_mapping *va_map;
+	struct amdgpu_vm *vm = queue->vm;
+	u64 user_addr;
+	u64 size;
+	int r = 0;
+
+	user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+	size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+	r = amdgpu_bo_reserve(vm->root.bo, false);
+	if (r)
+		return r;
+
+	va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+	if (!va_map) {
+		r = -EINVAL;
+		goto out_err;
+	}
+	/* Only validate the userq whether resident in the VM mapping range */
+	if (user_addr >= va_map->start  &&
+	    va_map->last - user_addr + 1 >= size) {
+		amdgpu_userq_buffer_va_list_add(queue, va_map, user_addr);
+		amdgpu_bo_unreserve(vm->root.bo);
+		return 0;
+	}
+
+	r = -EINVAL;
+out_err:
+	amdgpu_bo_unreserve(vm->root.bo);
+	return r;
+}
+
+static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	bool r;
+
+	if (amdgpu_bo_reserve(vm->root.bo, false))
+		return false;
+
+	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+	if (!IS_ERR_OR_NULL(mapping) && atomic_read(&mapping->bo_va->userq_va_mapped))
+		r = true;
+	else
+		r = false;
+	amdgpu_bo_unreserve(vm->root.bo);
+
+	return r;
+}
+
+static bool amdgpu_userq_buffer_vas_mapped(struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+	int r = 0;
+
+	list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+		r += amdgpu_userq_buffer_va_mapped(queue->vm, va_cursor->gpu_addr);
+		dev_dbg(queue->userq_mgr->adev->dev,
+			"validate the userq mapping:%p va:%llx r:%d\n",
+			queue, va_cursor->gpu_addr, r);
+	}
+
+	if (r != 0)
+		return true;
+
+	return false;
+}
+
+static void amdgpu_userq_buffer_va_list_del(struct amdgpu_bo_va_mapping *mapping,
+					    struct amdgpu_userq_va_cursor *va_cursor)
+{
+	atomic_set(&mapping->bo_va->userq_va_mapped, 0);
+	list_del(&va_cursor->list);
+	kfree(va_cursor);
+}
+
+static int amdgpu_userq_buffer_vas_list_cleanup(struct amdgpu_device *adev,
+						struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_userq_va_cursor *va_cursor, *tmp;
+	struct amdgpu_bo_va_mapping *mapping;
+	int r;
+
+	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+	if (r)
+		return r;
+
+	list_for_each_entry_safe(va_cursor, tmp, &queue->userq_va_list, list) {
+		mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, va_cursor->gpu_addr);
+		if (!mapping) {
+			r = -EINVAL;
+			goto err;
+		}
+		dev_dbg(adev->dev, "delete the userq:%p va:%llx\n",
+			queue, va_cursor->gpu_addr);
+		amdgpu_userq_buffer_va_list_del(mapping, va_cursor);
+	}
+err:
+	amdgpu_bo_unreserve(queue->vm->root.bo);
+	return r;
+}
+
+static int
+amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
+			  struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	bool found_hung_queue = false;
+	int r = 0;
+
+	if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+		r = userq_funcs->preempt(uq_mgr, queue);
+		if (r) {
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+			found_hung_queue = true;
+		} else {
+			queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
+		}
+	}
+
+	if (found_hung_queue)
+		amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+	return r;
+}
+
+static int
+amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
+			struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	int r = 0;
+
+	if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
+		r = userq_funcs->restore(uq_mgr, queue);
+		if (r) {
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+		} else {
+			queue->state = AMDGPU_USERQ_STATE_MAPPED;
+		}
+	}
+
+	return r;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+			  struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	bool found_hung_queue = false;
+	int r = 0;
+
+	if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
+		(queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
+		r = userq_funcs->unmap(uq_mgr, queue);
+		if (r) {
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+			found_hung_queue = true;
+		} else {
+			queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+		}
+	}
+
+	if (found_hung_queue)
+		amdgpu_userq_detect_and_reset_queues(uq_mgr);
+
+	return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+			struct amdgpu_usermode_queue *queue)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs =
+		adev->userq_funcs[queue->queue_type];
+	int r = 0;
+
+	if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+		r = userq_funcs->map(uq_mgr, queue);
+		if (r) {
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+			amdgpu_userq_detect_and_reset_queues(uq_mgr);
+		} else {
+			queue->state = AMDGPU_USERQ_STATE_MAPPED;
+		}
+	}
+
+	return r;
+}
+
+static int
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_usermode_queue *queue)
+{
+	struct dma_fence *f = queue->last_fence;
+	int ret = 0;
+
+	if (f && !dma_fence_is_signaled(f)) {
+		ret = dma_fence_wait_timeout(f, true, MAX_SCHEDULE_TIMEOUT);
+		if (ret <= 0) {
+			drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+				     f->context, f->seqno);
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+			return -ETIME;
+		}
+	}
+
+	return ret;
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+		     struct amdgpu_usermode_queue *queue,
+		     int queue_id)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+	/* Wait for mode-1 reset to complete */
+	down_read(&adev->reset_domain->sem);
+
+	/* Drop the userq reference. */
+	amdgpu_userq_buffer_vas_list_cleanup(adev, queue);
+	uq_funcs->mqd_destroy(uq_mgr, queue);
+	amdgpu_userq_fence_driver_free(queue);
+	/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
+	xa_erase_irq(&uq_mgr->userq_mgr_xa, (unsigned long)queue_id);
+	xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
+	queue->userq_mgr = NULL;
+	list_del(&queue->userq_va_list);
+	kfree(queue);
+
+	up_read(&adev->reset_domain->sem);
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+	return xa_load(&uq_mgr->userq_mgr_xa, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+			     struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+	/* Flush any pending resume work to create ev_fence */
+	flush_delayed_work(&uq_mgr->resume_work);
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+		mutex_unlock(&uq_mgr->userq_mutex);
+		/*
+		 * Looks like there was no pending resume work,
+		 * add one now to create a valid eviction fence
+		 */
+		schedule_delayed_work(&uq_mgr->resume_work, 0);
+		goto retry;
+	}
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+			       struct amdgpu_userq_obj *userq_obj,
+			       int size)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+		   AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	bp.type = ttm_bo_type_kernel;
+	bp.size = size;
+	bp.resv = NULL;
+	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+	r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+		return r;
+	}
+
+	r = amdgpu_bo_reserve(userq_obj->obj, true);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+		goto free_obj;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+		goto unresv;
+	}
+
+	r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+		goto unresv;
+	}
+
+	userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+	amdgpu_bo_unreserve(userq_obj->obj);
+	memset(userq_obj->cpu_ptr, 0, size);
+	return 0;
+
+unresv:
+	amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+	amdgpu_bo_unref(&userq_obj->obj);
+	return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_userq_obj *userq_obj)
+{
+	amdgpu_bo_kunmap(userq_obj->obj);
+	amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+				struct amdgpu_db_info *db_info,
+				struct drm_file *filp)
+{
+	uint64_t index;
+	struct drm_gem_object *gobj;
+	struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+	int r, db_size;
+
+	gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+	if (gobj == NULL) {
+		drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+		return -EINVAL;
+	}
+
+	db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	drm_gem_object_put(gobj);
+
+	r = amdgpu_bo_reserve(db_obj->obj, true);
+	if (r) {
+		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+		goto unref_bo;
+	}
+
+	/* Pin the BO before generating the index, unpin in queue destroy */
+	r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+	if (r) {
+		drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+		goto unresv_bo;
+	}
+
+	switch (db_info->queue_type) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_COMPUTE:
+	case AMDGPU_HW_IP_DMA:
+		db_size = sizeof(u64);
+		break;
+	default:
+		drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+			     db_info->queue_type);
+		r = -EINVAL;
+		goto unpin_bo;
+	}
+
+	index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+					     db_info->doorbell_offset, db_size);
+	drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+		       "[Usermode queues] doorbell index=%lld\n", index);
+	amdgpu_bo_unreserve(db_obj->obj);
+	return index;
+
+unpin_bo:
+	amdgpu_bo_unpin(db_obj->obj);
+unresv_bo:
+	amdgpu_bo_unreserve(db_obj->obj);
+unref_bo:
+	amdgpu_bo_unref(&db_obj->obj);
+	return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_usermode_queue *queue;
+	int r = 0;
+
+	cancel_delayed_work_sync(&uq_mgr->resume_work);
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	queue = amdgpu_userq_find(uq_mgr, queue_id);
+	if (!queue) {
+		drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+		mutex_unlock(&uq_mgr->userq_mutex);
+		return -EINVAL;
+	}
+	amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+	r = amdgpu_bo_reserve(queue->db_obj.obj, true);
+	if (!r) {
+		amdgpu_bo_unpin(queue->db_obj.obj);
+		amdgpu_bo_unreserve(queue->db_obj.obj);
+	}
+	amdgpu_bo_unref(&queue->db_obj.obj);
+	atomic_dec(&uq_mgr->userq_count[queue->queue_type]);
+#if defined(CONFIG_DEBUG_FS)
+	debugfs_remove_recursive(queue->debugfs_queue);
+#endif
+	amdgpu_userq_detect_and_reset_queues(uq_mgr);
+	r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+	/*TODO: It requires a reset for userq hw unmap error*/
+	if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
+		drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
+		queue->state = AMDGPU_USERQ_STATE_HUNG;
+	}
+	amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+	mutex_unlock(&uq_mgr->userq_mutex);
+
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+					int priority)
+{
+	if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+		return 0;
+
+	if (capable(CAP_SYS_NICE))
+		return 0;
+
+	if (drm_is_current_master(filp))
+		return 0;
+
+	return -EACCES;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
+{
+	struct amdgpu_usermode_queue *queue = m->private;
+	struct amdgpu_bo *bo;
+	int r;
+
+	if (!queue || !queue->mqd.obj)
+		return -EINVAL;
+
+	bo = amdgpu_bo_ref(queue->mqd.obj);
+	r = amdgpu_bo_reserve(bo, true);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		return -EINVAL;
+	}
+
+	seq_printf(m, "queue_type: %d\n", queue->queue_type);
+	seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
+
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+
+	return 0;
+}
+
+static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, amdgpu_mqd_info_read, inode->i_private);
+}
+
+static const struct file_operations amdgpu_mqd_info_fops = {
+	.owner = THIS_MODULE,
+	.open = amdgpu_mqd_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *uq_funcs;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_db_info db_info;
+	char *queue_name;
+	bool skip_map_queue;
+	u32 qid;
+	uint64_t index;
+	int r = 0;
+	int priority =
+		(args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+		AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+	r = amdgpu_userq_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	/*
+	 * There could be a situation that we are creating a new queue while
+	 * the other queues under this UQ_mgr are suspended. So if there is any
+	 * resume work pending, wait for it to get done.
+	 *
+	 * This will also make sure we have a valid eviction fence ready to be used.
+	 */
+	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+	uq_funcs = adev->userq_funcs[args->in.ip_type];
+	if (!uq_funcs) {
+		drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+			     args->in.ip_type);
+		r = -EINVAL;
+		goto unlock;
+	}
+
+	queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+	if (!queue) {
+		drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+		r = -ENOMEM;
+		goto unlock;
+	}
+
+	INIT_LIST_HEAD(&queue->userq_va_list);
+	queue->doorbell_handle = args->in.doorbell_handle;
+	queue->queue_type = args->in.ip_type;
+	queue->vm = &fpriv->vm;
+	queue->priority = priority;
+
+	db_info.queue_type = queue->queue_type;
+	db_info.doorbell_handle = queue->doorbell_handle;
+	db_info.db_obj = &queue->db_obj;
+	db_info.doorbell_offset = args->in.doorbell_offset;
+
+	/* Validate the userq virtual address.*/
+	if (amdgpu_userq_input_va_validate(queue, args->in.queue_va, args->in.queue_size) ||
+	    amdgpu_userq_input_va_validate(queue, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
+	    amdgpu_userq_input_va_validate(queue, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
+		r = -EINVAL;
+		kfree(queue);
+		goto unlock;
+	}
+
+	/* Convert relative doorbell offset into absolute doorbell index */
+	index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+	if (index == (uint64_t)-EINVAL) {
+		drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+		kfree(queue);
+		r = -EINVAL;
+		goto unlock;
+	}
+
+	queue->doorbell_index = index;
+	xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+	r = amdgpu_userq_fence_driver_alloc(adev, queue);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+		goto unlock;
+	}
+
+	r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+		amdgpu_userq_fence_driver_free(queue);
+		kfree(queue);
+		goto unlock;
+	}
+
+	/* Wait for mode-1 reset to complete */
+	down_read(&adev->reset_domain->sem);
+	r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
+	if (r) {
+		kfree(queue);
+		up_read(&adev->reset_domain->sem);
+		goto unlock;
+	}
+
+	r = xa_alloc(&uq_mgr->userq_mgr_xa, &qid, queue, XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
+	if (r) {
+		drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+		amdgpu_userq_fence_driver_free(queue);
+		uq_funcs->mqd_destroy(uq_mgr, queue);
+		kfree(queue);
+		r = -ENOMEM;
+		up_read(&adev->reset_domain->sem);
+		goto unlock;
+	}
+	up_read(&adev->reset_domain->sem);
+	queue->userq_mgr = uq_mgr;
+
+	/* don't map the queue if scheduling is halted */
+	if (adev->userq_halt_for_enforce_isolation &&
+	    ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+	     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+		skip_map_queue = true;
+	else
+		skip_map_queue = false;
+	if (!skip_map_queue) {
+		r = amdgpu_userq_map_helper(uq_mgr, queue);
+		if (r) {
+			drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+			xa_erase(&uq_mgr->userq_mgr_xa, qid);
+			amdgpu_userq_fence_driver_free(queue);
+			uq_funcs->mqd_destroy(uq_mgr, queue);
+			kfree(queue);
+			goto unlock;
+		}
+	}
+
+	queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
+	if (!queue_name) {
+		r = -ENOMEM;
+		goto unlock;
+	}
+
+#if defined(CONFIG_DEBUG_FS)
+	/* Queue dentry per client to hold MQD information   */
+	queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
+	debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
+#endif
+	kfree(queue_name);
+
+	args->out.queue_id = qid;
+	atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+
+	return r;
+}
+
+static int amdgpu_userq_input_args_validate(struct drm_device *dev,
+					union drm_amdgpu_userq *args,
+					struct drm_file *filp)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	switch (args->in.op) {
+	case AMDGPU_USERQ_OP_CREATE:
+		if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+				       AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+			return -EINVAL;
+		/* Usermode queues are only supported for GFX IP as of now */
+		if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+		    args->in.ip_type != AMDGPU_HW_IP_DMA &&
+		    args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+			drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
+				     args->in.ip_type);
+			return -EINVAL;
+		}
+
+		if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+		    (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+		    (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+		    !amdgpu_is_tmz(adev)) {
+			drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
+			return -EINVAL;
+		}
+
+		if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
+		    args->in.queue_va == 0 ||
+		    args->in.queue_size == 0) {
+			drm_file_err(filp, "invalidate userq queue va or size\n");
+			return -EINVAL;
+		}
+		if (!args->in.wptr_va || !args->in.rptr_va) {
+			drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
+			return -EINVAL;
+		}
+		break;
+	case AMDGPU_USERQ_OP_FREE:
+		if (args->in.ip_type ||
+		    args->in.doorbell_handle ||
+		    args->in.doorbell_offset ||
+		    args->in.flags ||
+		    args->in.queue_va ||
+		    args->in.queue_size ||
+		    args->in.rptr_va ||
+		    args->in.wptr_va ||
+		    args->in.mqd ||
+		    args->in.mqd_size)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *filp)
+{
+	union drm_amdgpu_userq *args = data;
+	int r;
+
+	if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
+		return -EINVAL;
+
+	switch (args->in.op) {
+	case AMDGPU_USERQ_OP_CREATE:
+		r = amdgpu_userq_create(filp, args);
+		if (r)
+			drm_file_err(filp, "Failed to create usermode queue\n");
+		break;
+
+	case AMDGPU_USERQ_OP_FREE:
+		r = amdgpu_userq_destroy(filp, args->in.queue_id);
+		if (r)
+			drm_file_err(filp, "Failed to destroy usermode queue\n");
+		break;
+
+	default:
+		drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+		return -EINVAL;
+	}
+
+	return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	unsigned long queue_id;
+	int ret = 0, r;
+
+	/* Resume all the queues for this process */
+	xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+
+		if (!amdgpu_userq_buffer_vas_mapped(queue)) {
+			drm_file_err(uq_mgr->file,
+				     "trying restore queue without va mapping\n");
+			queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
+			continue;
+		}
+
+		r = amdgpu_userq_restore_helper(uq_mgr, queue);
+		if (r)
+			ret = r;
+	}
+
+	if (ret)
+		drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+	return ret;
+}
+
+static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+
+	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+/* Handle all BOs on the invalidated list, validate them and update the PTs */
+static int
+amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
+			 struct amdgpu_vm *vm)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_bo *bo;
+	int ret;
+
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated,
+					 struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_va->base.bo;
+		ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
+		if (unlikely(ret))
+			return ret;
+
+		amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		if (ret)
+			return ret;
+
+		/* This moves the bo_va to the done list */
+		ret = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (ret)
+			return ret;
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
+
+	return 0;
+}
+
+/* Make sure the whole VM is ready to be used */
+static int
+amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	bool invalidated = false, new_addition = false;
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_hmm_range *range;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	unsigned long key, tmp_key;
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_bo *bo;
+	struct drm_exec exec;
+	struct xarray xa;
+	int ret;
+
+	xa_init(&xa);
+
+retry_lock:
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = amdgpu_vm_lock_pd(vm, &exec, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret))
+			goto unlock_all;
+
+		ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret))
+			goto unlock_all;
+
+		/* This validates PDs, PTs and per VM BOs */
+		ret = amdgpu_vm_validate(adev, vm, NULL,
+					 amdgpu_userq_validate_vm,
+					 NULL);
+		if (unlikely(ret))
+			goto unlock_all;
+
+		/* This locks and validates the remaining evicted BOs */
+		ret = amdgpu_userq_bo_validate(adev, &exec, vm);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret))
+			goto unlock_all;
+	}
+
+	if (invalidated) {
+		xa_for_each(&xa, tmp_key, range) {
+			bo = range->bo;
+			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret)
+				goto unlock_all;
+
+			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+
+			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret)
+				goto unlock_all;
+		}
+		invalidated = false;
+	}
+
+	ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+	if (ret)
+		goto unlock_all;
+
+	key = 0;
+	/* Validate User Ptr BOs */
+	list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+		bo = bo_va->base.bo;
+		if (!bo)
+			continue;
+
+		if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+			continue;
+
+		range = xa_load(&xa, key);
+		if (range && range->bo != bo) {
+			xa_erase(&xa, key);
+			amdgpu_hmm_range_free(range);
+			range = NULL;
+		}
+
+		if (!range) {
+			range = amdgpu_hmm_range_alloc(bo);
+			if (!range) {
+				ret = -ENOMEM;
+				goto unlock_all;
+			}
+
+			xa_store(&xa, key, range, GFP_KERNEL);
+			new_addition = true;
+		}
+		key++;
+	}
+
+	if (new_addition) {
+		drm_exec_fini(&exec);
+		xa_for_each(&xa, tmp_key, range) {
+			if (!range)
+				continue;
+			bo = range->bo;
+			ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+			if (ret)
+				goto unlock_all;
+		}
+
+		invalidated = true;
+		new_addition = false;
+		goto retry_lock;
+	}
+
+	ret = amdgpu_vm_update_pdes(adev, vm, false);
+	if (ret)
+		goto unlock_all;
+
+	/*
+	 * We need to wait for all VM updates to finish before restarting the
+	 * queues. Using the done list like that is now ok since everything is
+	 * locked in place.
+	 */
+	list_for_each_entry(bo_va, &vm->done, base.vm_status)
+		dma_fence_wait(bo_va->last_pt_update, false);
+	dma_fence_wait(vm->last_update, false);
+
+	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+	if (ret)
+		drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+	drm_exec_fini(&exec);
+	xa_for_each(&xa, tmp_key, range) {
+		if (!range)
+			continue;
+		bo = range->bo;
+		amdgpu_hmm_range_free(range);
+	}
+	xa_destroy(&xa);
+	return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+	struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	int ret;
+
+	flush_delayed_work(&fpriv->evf_mgr.suspend_work);
+
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	ret = amdgpu_userq_vm_validate(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+		goto unlock;
+	}
+
+	ret = amdgpu_userq_restore_all(uq_mgr);
+	if (ret) {
+		drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+		goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	unsigned long queue_id;
+	int ret = 0, r;
+
+	amdgpu_userq_detect_and_reset_queues(uq_mgr);
+	/* Try to unmap all the queues in this process ctx */
+	xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+		r = amdgpu_userq_preempt_helper(uq_mgr, queue);
+		if (r)
+			ret = r;
+	}
+
+	if (ret)
+		drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+	return ret;
+}
+
+void amdgpu_userq_reset_work(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  userq_reset_work);
+	struct amdgpu_reset_context reset_context;
+
+	memset(&reset_context, 0, sizeof(reset_context));
+
+	reset_context.method = AMD_RESET_METHOD_NONE;
+	reset_context.reset_req_dev = adev;
+	reset_context.src = AMDGPU_RESET_SRC_USERQ;
+	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+	/*set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);*/
+
+	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	unsigned long queue_id;
+	int ret;
+
+	xa_for_each(&uq_mgr->userq_mgr_xa, queue_id, queue) {
+		struct dma_fence *f = queue->last_fence;
+
+		if (!f || dma_fence_is_signaled(f))
+			continue;
+		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+		if (ret <= 0) {
+			drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+				     f->context, f->seqno);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_eviction_fence *ev_fence)
+{
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	int ret;
+
+	/* Wait for any pending userqueue fence work to finish */
+	ret = amdgpu_userq_wait_for_signal(uq_mgr);
+	if (ret)
+		dev_err(adev->dev, "Not evicting userqueue, timeout waiting for work\n");
+
+	ret = amdgpu_userq_evict_all(uq_mgr);
+	if (ret)
+		dev_err(adev->dev, "Failed to evict userqueue\n");
+
+	/* Signal current eviction fence */
+	amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+	if (evf_mgr->fd_closing) {
+		cancel_delayed_work_sync(&uq_mgr->resume_work);
+		return;
+	}
+
+	/* Schedule a resume work */
+	schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+			  struct amdgpu_device *adev)
+{
+	mutex_init(&userq_mgr->userq_mutex);
+	xa_init_flags(&userq_mgr->userq_mgr_xa, XA_FLAGS_ALLOC);
+	userq_mgr->adev = adev;
+	userq_mgr->file = file_priv;
+
+	INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+	return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	unsigned long queue_id;
+
+	cancel_delayed_work_sync(&userq_mgr->resume_work);
+
+	mutex_lock(&userq_mgr->userq_mutex);
+	amdgpu_userq_detect_and_reset_queues(userq_mgr);
+	xa_for_each(&userq_mgr->userq_mgr_xa, queue_id, queue) {
+		amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+		amdgpu_userq_unmap_helper(userq_mgr, queue);
+		amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+	}
+
+	xa_destroy(&userq_mgr->userq_mgr_xa);
+	mutex_unlock(&userq_mgr->userq_mutex);
+	mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm;
+	unsigned long queue_id;
+	int r;
+
+	if (!ip_mask)
+		return 0;
+
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		cancel_delayed_work_sync(&uqm->resume_work);
+		guard(mutex)(&uqm->userq_mutex);
+		amdgpu_userq_detect_and_reset_queues(uqm);
+		if (adev->in_s0ix)
+			r = amdgpu_userq_preempt_helper(uqm, queue);
+		else
+			r = amdgpu_userq_unmap_helper(uqm, queue);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm;
+	unsigned long queue_id;
+	int r;
+
+	if (!ip_mask)
+		return 0;
+
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		guard(mutex)(&uqm->userq_mutex);
+		if (adev->in_s0ix)
+			r = amdgpu_userq_restore_helper(uqm, queue);
+		else
+			r = amdgpu_userq_map_helper(uqm, queue);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						  u32 idx)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm;
+	unsigned long queue_id;
+	int ret = 0, r;
+
+	/* only need to stop gfx/compute */
+	if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+		return 0;
+
+	if (adev->userq_halt_for_enforce_isolation)
+		dev_warn(adev->dev, "userq scheduling already stopped!\n");
+	adev->userq_halt_for_enforce_isolation = true;
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		cancel_delayed_work_sync(&uqm->resume_work);
+		mutex_lock(&uqm->userq_mutex);
+		if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+		     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+		    (queue->xcp_id == idx)) {
+			amdgpu_userq_detect_and_reset_queues(uqm);
+			r = amdgpu_userq_preempt_helper(uqm, queue);
+			if (r)
+				ret = r;
+		}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+
+	return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						   u32 idx)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm;
+	unsigned long queue_id;
+	int ret = 0, r;
+
+	/* only need to stop gfx/compute */
+	if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+		return 0;
+
+	if (!adev->userq_halt_for_enforce_isolation)
+		dev_warn(adev->dev, "userq scheduling already started!\n");
+	adev->userq_halt_for_enforce_isolation = false;
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		mutex_lock(&uqm->userq_mutex);
+			if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+			     (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+			    (queue->xcp_id == idx)) {
+				r = amdgpu_userq_restore_helper(uqm, queue);
+				if (r)
+					ret = r;
+			}
+		mutex_unlock(&uqm->userq_mutex);
+	}
+
+	return ret;
+}
+
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+				       struct amdgpu_bo_va_mapping *mapping,
+				       uint64_t saddr)
+{
+	u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+	struct amdgpu_bo_va *bo_va = mapping->bo_va;
+	struct dma_resv *resv = bo_va->base.bo->tbo.base.resv;
+	int ret = 0;
+
+	if (!ip_mask)
+		return 0;
+
+	dev_warn_once(adev->dev, "now unmapping a vital queue va:%llx\n", saddr);
+	/**
+	 * The userq VA mapping reservation should include the eviction fence,
+	 * if the eviction fence can't signal successfully during unmapping,
+	 * then driver will warn to flag this improper unmap of the userq VA.
+	 * Note: The eviction fence may be attached to different BOs, and this
+	 * unmap is only for one kind of userq VAs, so at this point suppose
+	 * the eviction fence is always unsignaled.
+	 */
+	if (!dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) {
+		ret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, true,
+					    MAX_SCHEDULE_TIMEOUT);
+		if (ret <= 0)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev)
+{
+	const struct amdgpu_userq_funcs *userq_funcs;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_mgr *uqm;
+	unsigned long queue_id;
+
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		cancel_delayed_work_sync(&uqm->resume_work);
+		if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+			amdgpu_userq_wait_for_last_fence(uqm, queue);
+			userq_funcs = adev->userq_funcs[queue->queue_type];
+			userq_funcs->unmap(uqm, queue);
+			/* just mark all queues as hung at this point.
+			 * if unmap succeeds, we could map again
+			 * in amdgpu_userq_post_reset() if vram is not lost
+			 */
+			queue->state = AMDGPU_USERQ_STATE_HUNG;
+			amdgpu_userq_fence_driver_force_completion(queue);
+		}
+	}
+}
+
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost)
+{
+	/* if any queue state is AMDGPU_USERQ_STATE_UNMAPPED
+	 * at this point, we should be able to map it again
+	 * and continue if vram is not lost.
+	 */
+	struct amdgpu_userq_mgr *uqm;
+	struct amdgpu_usermode_queue *queue;
+	const struct amdgpu_userq_funcs *userq_funcs;
+	unsigned long queue_id;
+	int r = 0;
+
+	xa_for_each(&adev->userq_doorbell_xa, queue_id, queue) {
+		uqm = queue->userq_mgr;
+		if (queue->state == AMDGPU_USERQ_STATE_HUNG && !vram_lost) {
+			userq_funcs = adev->userq_funcs[queue->queue_type];
+			/* Re-map queue */
+			r = userq_funcs->map(uqm, queue);
+			if (r) {
+				dev_err(adev->dev, "Failed to remap queue %ld\n", queue_id);
+				continue;
+			}
+			queue->state = AMDGPU_USERQ_STATE_MAPPED;
+		}
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..c37444427a14
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+	AMDGPU_USERQ_STATE_UNMAPPED = 0,
+	AMDGPU_USERQ_STATE_MAPPED,
+	AMDGPU_USERQ_STATE_PREEMPTED,
+	AMDGPU_USERQ_STATE_HUNG,
+	AMDGPU_USERQ_STATE_INVALID_VA,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+	void		 *cpu_ptr;
+	uint64_t	 gpu_addr;
+	struct amdgpu_bo *obj;
+};
+
+struct amdgpu_userq_va_cursor {
+	u64			gpu_addr;
+	struct list_head	list;
+};
+
+struct amdgpu_usermode_queue {
+	int			queue_type;
+	enum amdgpu_userq_state state;
+	uint64_t		doorbell_handle;
+	uint64_t		doorbell_index;
+	uint64_t		flags;
+	struct amdgpu_mqd_prop	*userq_prop;
+	struct amdgpu_userq_mgr *userq_mgr;
+	struct amdgpu_vm	*vm;
+	struct amdgpu_userq_obj mqd;
+	struct amdgpu_userq_obj	db_obj;
+	struct amdgpu_userq_obj fw_obj;
+	struct amdgpu_userq_obj wptr_obj;
+	struct xarray		fence_drv_xa;
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct dma_fence	*last_fence;
+	u32			xcp_id;
+	int			priority;
+	struct dentry		*debugfs_queue;
+
+	struct list_head	userq_va_list;
+};
+
+struct amdgpu_userq_funcs {
+	int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+			  struct drm_amdgpu_userq_in *args,
+			  struct amdgpu_usermode_queue *queue);
+	void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+			    struct amdgpu_usermode_queue *uq);
+	int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+		     struct amdgpu_usermode_queue *queue);
+	int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_usermode_queue *queue);
+	int (*preempt)(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_usermode_queue *queue);
+	int (*restore)(struct amdgpu_userq_mgr *uq_mgr,
+		   struct amdgpu_usermode_queue *queue);
+	int (*detect_and_reset)(struct amdgpu_device *adev,
+		  int queue_type);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+	/**
+	 * @userq_mgr_xa: Per-process user queue map (queue ID → queue)
+	 * Key: queue_id (unique ID within the process's userq manager)
+	 * Value: struct amdgpu_usermode_queue
+	 */
+	struct xarray			userq_mgr_xa;
+	struct mutex			userq_mutex;
+	struct amdgpu_device		*adev;
+	struct delayed_work		resume_work;
+	struct drm_file			*file;
+	atomic_t                        userq_count[AMDGPU_RING_TYPE_MAX];
+};
+
+struct amdgpu_db_info {
+	uint64_t doorbell_handle;
+	uint32_t queue_type;
+	uint32_t doorbell_offset;
+	struct amdgpu_userq_obj	*db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+			  struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+			       struct amdgpu_userq_obj *userq_obj,
+			       int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+			struct amdgpu_eviction_fence *ev_fence);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+				  struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+					 struct amdgpu_db_info *db_info,
+					     struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						  u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+						   u32 idx);
+void amdgpu_userq_reset_work(struct work_struct *work);
+void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
+int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
+
+int amdgpu_userq_input_va_validate(struct amdgpu_usermode_queue *queue,
+				   u64 addr, u64 expected_size);
+int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
+				       struct amdgpu_bo_va_mapping *mapping,
+				       uint64_t saddr);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..eba9fb359047
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,1011 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+						    sizeof(struct amdgpu_userq_fence),
+						    0,
+						    SLAB_HWCACHE_ALIGN,
+						    NULL);
+	if (!amdgpu_userq_fence_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+	if (!f || f->ops != &amdgpu_userq_fence_ops)
+		return NULL;
+
+	return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+static void
+amdgpu_userq_fence_write(struct amdgpu_userq_fence_driver *fence_drv,
+			 u64 seq)
+{
+	if (fence_drv->cpu_addr)
+		*fence_drv->cpu_addr = cpu_to_le64(seq);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+				    struct amdgpu_usermode_queue *userq)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	unsigned long flags;
+	int r;
+
+	fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+	if (!fence_drv)
+		return -ENOMEM;
+
+	/* Acquire seq64 memory */
+	r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+			       &fence_drv->cpu_addr);
+	if (r)
+		goto free_fence_drv;
+
+	memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+	kref_init(&fence_drv->refcount);
+	INIT_LIST_HEAD(&fence_drv->fences);
+	spin_lock_init(&fence_drv->fence_list_lock);
+
+	fence_drv->adev = adev;
+	fence_drv->context = dma_fence_context_alloc(1);
+	get_task_comm(fence_drv->timeline_name, current);
+
+	xa_lock_irqsave(&adev->userq_xa, flags);
+	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+			      fence_drv, GFP_KERNEL));
+	xa_unlock_irqrestore(&adev->userq_xa, flags);
+	if (r)
+		goto free_seq64;
+
+	userq->fence_drv = fence_drv;
+
+	return 0;
+
+free_seq64:
+	amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+	kfree(fence_drv);
+
+	return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	unsigned long index;
+
+	if (xa_empty(xa))
+		return;
+
+	xa_lock(xa);
+	xa_for_each(xa, index, fence_drv) {
+		__xa_erase(xa, index);
+		amdgpu_userq_fence_driver_put(fence_drv);
+	}
+
+	xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+	xa_destroy(&userq->fence_drv_xa);
+	/* Drop the fence_drv reference held by user queue */
+	amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	struct amdgpu_userq_fence *userq_fence, *tmp;
+	struct dma_fence *fence;
+	unsigned long flags;
+	u64 rptr;
+	int i;
+
+	if (!fence_drv)
+		return;
+
+	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+	rptr = amdgpu_userq_fence_read(fence_drv);
+
+	list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+		fence = &userq_fence->base;
+
+		if (rptr < fence->seqno)
+			break;
+
+		dma_fence_signal(fence);
+
+		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+		list_del(&userq_fence->link);
+		dma_fence_put(fence);
+	}
+	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+					 struct amdgpu_userq_fence_driver,
+					 refcount);
+	struct amdgpu_userq_fence_driver *xa_fence_drv;
+	struct amdgpu_device *adev = fence_drv->adev;
+	struct amdgpu_userq_fence *fence, *tmp;
+	struct xarray *xa = &adev->userq_xa;
+	unsigned long index, flags;
+	struct dma_fence *f;
+
+	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+	list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+		f = &fence->base;
+
+		if (!dma_fence_is_signaled(f)) {
+			dma_fence_set_error(f, -ECANCELED);
+			dma_fence_signal(f);
+		}
+
+		list_del(&fence->link);
+		dma_fence_put(f);
+	}
+	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+	xa_lock_irqsave(xa, flags);
+	xa_for_each(xa, index, xa_fence_drv)
+		if (xa_fence_drv == fence_drv)
+			__xa_erase(xa, index);
+	xa_unlock_irqrestore(xa, flags);
+
+	/* Free seq64 memory */
+	amdgpu_seq64_free(adev, fence_drv->va);
+	kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+	*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+	return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+				     struct amdgpu_userq_fence *userq_fence,
+				     u64 seq, struct dma_fence **f)
+{
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct dma_fence *fence;
+	unsigned long flags;
+
+	fence_drv = userq->fence_drv;
+	if (!fence_drv)
+		return -EINVAL;
+
+	spin_lock_init(&userq_fence->lock);
+	INIT_LIST_HEAD(&userq_fence->link);
+	fence = &userq_fence->base;
+	userq_fence->fence_drv = fence_drv;
+
+	dma_fence_init64(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+			 fence_drv->context, seq);
+
+	amdgpu_userq_fence_driver_get(fence_drv);
+	dma_fence_get(fence);
+
+	if (!xa_empty(&userq->fence_drv_xa)) {
+		struct amdgpu_userq_fence_driver *stored_fence_drv;
+		unsigned long index, count = 0;
+		int i = 0;
+
+		xa_lock(&userq->fence_drv_xa);
+		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+			count++;
+
+		userq_fence->fence_drv_array =
+			kvmalloc_array(count,
+				       sizeof(struct amdgpu_userq_fence_driver *),
+				       GFP_ATOMIC);
+
+		if (userq_fence->fence_drv_array) {
+			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+				userq_fence->fence_drv_array[i] = stored_fence_drv;
+				__xa_erase(&userq->fence_drv_xa, index);
+				i++;
+			}
+		}
+
+		userq_fence->fence_drv_array_count = i;
+		xa_unlock(&userq->fence_drv_xa);
+	} else {
+		userq_fence->fence_drv_array = NULL;
+		userq_fence->fence_drv_array_count = 0;
+	}
+
+	/* Check if hardware has already processed the job */
+	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+	if (!dma_fence_is_signaled(fence))
+		list_add_tail(&userq_fence->link, &fence_drv->fences);
+	else
+		dma_fence_put(fence);
+
+	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+	*f = fence;
+
+	return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+	return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+	return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+	u64 rptr, wptr;
+
+	rptr = amdgpu_userq_fence_read(fence_drv);
+	wptr = fence->base.seqno;
+
+	if (rptr >= wptr)
+		return true;
+
+	return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+	struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+	struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+	struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+	/* Release the fence driver reference */
+	amdgpu_userq_fence_driver_put(fence_drv);
+
+	kvfree(userq_fence->fence_drv_array);
+	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+	call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+	.get_driver_name = amdgpu_userq_fence_get_driver_name,
+	.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+	.signaled = amdgpu_userq_fence_signaled,
+	.release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+					u64 *wptr)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo;
+	u64 addr, *ptr;
+	int r;
+
+	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+	if (r)
+		return r;
+
+	addr = queue->userq_prop->wptr_gpu_addr;
+	addr &= AMDGPU_GMC_HOLE_MASK;
+
+	mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+	if (!mapping) {
+		amdgpu_bo_unreserve(queue->vm->root.bo);
+		DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+		return -EINVAL;
+	}
+
+	bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+	amdgpu_bo_unreserve(queue->vm->root.bo);
+	r = amdgpu_bo_reserve(bo, true);
+	if (r) {
+		amdgpu_bo_unref(&bo);
+		DRM_ERROR("Failed to reserve userqueue wptr bo");
+		return r;
+	}
+
+	r = amdgpu_bo_kmap(bo, (void **)&ptr);
+	if (r) {
+		DRM_ERROR("Failed mapping the userqueue wptr bo");
+		goto map_error;
+	}
+
+	*wptr = le64_to_cpu(*ptr);
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+
+	return 0;
+
+map_error:
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+
+	return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+	dma_fence_put(fence);
+}
+
+static void
+amdgpu_userq_fence_driver_set_error(struct amdgpu_userq_fence *fence,
+				    int error)
+{
+	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+	unsigned long flags;
+	struct dma_fence *f;
+
+	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+
+	f = rcu_dereference_protected(&fence->base,
+				      lockdep_is_held(&fence_drv->fence_list_lock));
+	if (f && !dma_fence_is_signaled_locked(f))
+		dma_fence_set_error(f, error);
+	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+}
+
+void
+amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq)
+{
+	struct dma_fence *f = userq->last_fence;
+
+	if (f) {
+		struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+		struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+		u64 wptr = fence->base.seqno;
+
+		amdgpu_userq_fence_driver_set_error(fence, -ECANCELED);
+		amdgpu_userq_fence_write(fence_drv, wptr);
+		amdgpu_userq_fence_driver_process(fence_drv);
+
+	}
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp)
+{
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+	struct drm_amdgpu_userq_signal *args = data;
+	struct drm_gem_object **gobj_write = NULL;
+	struct drm_gem_object **gobj_read = NULL;
+	struct amdgpu_usermode_queue *queue;
+	struct amdgpu_userq_fence *userq_fence;
+	struct drm_syncobj **syncobj = NULL;
+	u32 *bo_handles_write, num_write_bo_handles;
+	u32 *syncobj_handles, num_syncobj_handles;
+	u32 *bo_handles_read, num_read_bo_handles;
+	int r, i, entry, rentry, wentry;
+	struct dma_fence *fence;
+	struct drm_exec exec;
+	u64 wptr;
+
+	num_syncobj_handles = args->num_syncobj_handles;
+	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+				      size_mul(sizeof(u32), num_syncobj_handles));
+	if (IS_ERR(syncobj_handles))
+		return PTR_ERR(syncobj_handles);
+
+	/* Array of pointers to the looked up syncobjs */
+	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+	if (!syncobj) {
+		r = -ENOMEM;
+		goto free_syncobj_handles;
+	}
+
+	for (entry = 0; entry < num_syncobj_handles; entry++) {
+		syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+		if (!syncobj[entry]) {
+			r = -ENOENT;
+			goto free_syncobj;
+		}
+	}
+
+	num_read_bo_handles = args->num_bo_read_handles;
+	bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+				      sizeof(u32) * num_read_bo_handles);
+	if (IS_ERR(bo_handles_read)) {
+		r = PTR_ERR(bo_handles_read);
+		goto free_syncobj;
+	}
+
+	/* Array of pointers to the GEM read objects */
+	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+	if (!gobj_read) {
+		r = -ENOMEM;
+		goto free_bo_handles_read;
+	}
+
+	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+		if (!gobj_read[rentry]) {
+			r = -ENOENT;
+			goto put_gobj_read;
+		}
+	}
+
+	num_write_bo_handles = args->num_bo_write_handles;
+	bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+				       sizeof(u32) * num_write_bo_handles);
+	if (IS_ERR(bo_handles_write)) {
+		r = PTR_ERR(bo_handles_write);
+		goto put_gobj_read;
+	}
+
+	/* Array of pointers to the GEM write objects */
+	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+	if (!gobj_write) {
+		r = -ENOMEM;
+		goto free_bo_handles_write;
+	}
+
+	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+		if (!gobj_write[wentry]) {
+			r = -ENOENT;
+			goto put_gobj_write;
+		}
+	}
+
+	/* Retrieve the user queue */
+	queue = xa_load(&userq_mgr->userq_mgr_xa, args->queue_id);
+	if (!queue) {
+		r = -ENOENT;
+		goto put_gobj_write;
+	}
+
+	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+	if (r)
+		goto put_gobj_write;
+
+	r = amdgpu_userq_fence_alloc(&userq_fence);
+	if (r)
+		goto put_gobj_write;
+
+	/* We are here means UQ is active, make sure the eviction fence is valid */
+	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+	/* Create a new fence */
+	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+	if (r) {
+		mutex_unlock(&userq_mgr->userq_mutex);
+		kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+		goto put_gobj_write;
+	}
+
+	dma_fence_put(queue->last_fence);
+	queue->last_fence = dma_fence_get(fence);
+	mutex_unlock(&userq_mgr->userq_mutex);
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+		      (num_read_bo_handles + num_write_bo_handles));
+
+	/* Lock all BOs with retry handling */
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			amdgpu_userq_fence_cleanup(fence);
+			goto exec_fini;
+		}
+
+		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			amdgpu_userq_fence_cleanup(fence);
+			goto exec_fini;
+		}
+	}
+
+	for (i = 0; i < num_read_bo_handles; i++) {
+		if (!gobj_read || !gobj_read[i]->resv)
+			continue;
+
+		dma_resv_add_fence(gobj_read[i]->resv, fence,
+				   DMA_RESV_USAGE_READ);
+	}
+
+	for (i = 0; i < num_write_bo_handles; i++) {
+		if (!gobj_write || !gobj_write[i]->resv)
+			continue;
+
+		dma_resv_add_fence(gobj_write[i]->resv, fence,
+				   DMA_RESV_USAGE_WRITE);
+	}
+
+	/* Add the created fence to syncobj/BO's */
+	for (i = 0; i < num_syncobj_handles; i++)
+		drm_syncobj_replace_fence(syncobj[i], fence);
+
+	/* drop the reference acquired in fence creation function */
+	dma_fence_put(fence);
+
+exec_fini:
+	drm_exec_fini(&exec);
+put_gobj_write:
+	while (wentry-- > 0)
+		drm_gem_object_put(gobj_write[wentry]);
+	kfree(gobj_write);
+free_bo_handles_write:
+	kfree(bo_handles_write);
+put_gobj_read:
+	while (rentry-- > 0)
+		drm_gem_object_put(gobj_read[rentry]);
+	kfree(gobj_read);
+free_bo_handles_read:
+	kfree(bo_handles_read);
+free_syncobj:
+	while (entry-- > 0)
+		if (syncobj[entry])
+			drm_syncobj_put(syncobj[entry]);
+	kfree(syncobj);
+free_syncobj_handles:
+	kfree(syncobj_handles);
+
+	return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp)
+{
+	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+	struct drm_amdgpu_userq_wait *wait_info = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_usermode_queue *waitq;
+	struct drm_gem_object **gobj_write;
+	struct drm_gem_object **gobj_read;
+	struct dma_fence **fences = NULL;
+	u16 num_points, num_fences = 0;
+	int r, i, rentry, wentry, cnt;
+	struct drm_exec exec;
+
+	num_read_bo_handles = wait_info->num_bo_read_handles;
+	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+				      size_mul(sizeof(u32), num_read_bo_handles));
+	if (IS_ERR(bo_handles_read))
+		return PTR_ERR(bo_handles_read);
+
+	num_write_bo_handles = wait_info->num_bo_write_handles;
+	bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+				       size_mul(sizeof(u32), num_write_bo_handles));
+	if (IS_ERR(bo_handles_write)) {
+		r = PTR_ERR(bo_handles_write);
+		goto free_bo_handles_read;
+	}
+
+	num_syncobj = wait_info->num_syncobj_handles;
+	syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+				      size_mul(sizeof(u32), num_syncobj));
+	if (IS_ERR(syncobj_handles)) {
+		r = PTR_ERR(syncobj_handles);
+		goto free_bo_handles_write;
+	}
+
+	num_points = wait_info->num_syncobj_timeline_handles;
+	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+				       sizeof(u32) * num_points);
+	if (IS_ERR(timeline_handles)) {
+		r = PTR_ERR(timeline_handles);
+		goto free_syncobj_handles;
+	}
+
+	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+				      sizeof(u32) * num_points);
+	if (IS_ERR(timeline_points)) {
+		r = PTR_ERR(timeline_points);
+		goto free_timeline_handles;
+	}
+
+	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+	if (!gobj_read) {
+		r = -ENOMEM;
+		goto free_timeline_points;
+	}
+
+	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+		if (!gobj_read[rentry]) {
+			r = -ENOENT;
+			goto put_gobj_read;
+		}
+	}
+
+	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+	if (!gobj_write) {
+		r = -ENOMEM;
+		goto put_gobj_read;
+	}
+
+	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+		if (!gobj_write[wentry]) {
+			r = -ENOENT;
+			goto put_gobj_write;
+		}
+	}
+
+	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+		      (num_read_bo_handles + num_write_bo_handles));
+
+	/* Lock all BOs with retry handling */
+	drm_exec_until_all_locked(&exec) {
+		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			drm_exec_fini(&exec);
+			goto put_gobj_write;
+		}
+
+		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+		drm_exec_retry_on_contention(&exec);
+		if (r) {
+			drm_exec_fini(&exec);
+			goto put_gobj_write;
+		}
+	}
+
+	if (!wait_info->num_fences) {
+		if (num_points) {
+			struct dma_fence_unwrap iter;
+			struct dma_fence *fence;
+			struct dma_fence *f;
+
+			for (i = 0; i < num_points; i++) {
+				r = drm_syncobj_find_fence(filp, timeline_handles[i],
+							   timeline_points[i],
+							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+							   &fence);
+				if (r)
+					goto exec_fini;
+
+				dma_fence_unwrap_for_each(f, &iter, fence)
+					num_fences++;
+
+				dma_fence_put(fence);
+			}
+		}
+
+		/* Count syncobj's fence */
+		for (i = 0; i < num_syncobj; i++) {
+			struct dma_fence *fence;
+
+			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+						   0,
+						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						   &fence);
+			if (r)
+				goto exec_fini;
+
+			num_fences++;
+			dma_fence_put(fence);
+		}
+
+		/* Count GEM objects fence */
+		for (i = 0; i < num_read_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+						DMA_RESV_USAGE_READ, fence)
+				num_fences++;
+		}
+
+		for (i = 0; i < num_write_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+						DMA_RESV_USAGE_WRITE, fence)
+				num_fences++;
+		}
+
+		/*
+		 * Passing num_fences = 0 means that userspace doesn't want to
+		 * retrieve userq_fence_info. If num_fences = 0 we skip filling
+		 * userq_fence_info and return the actual number of fences on
+		 * args->num_fences.
+		 */
+		wait_info->num_fences = num_fences;
+	} else {
+		/* Array of fence info */
+		fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+		if (!fence_info) {
+			r = -ENOMEM;
+			goto exec_fini;
+		}
+
+		/* Array of fences */
+		fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+		if (!fences) {
+			r = -ENOMEM;
+			goto free_fence_info;
+		}
+
+		/* Retrieve GEM read objects fence */
+		for (i = 0; i < num_read_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+						DMA_RESV_USAGE_READ, fence) {
+				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+					r = -EINVAL;
+					goto free_fences;
+				}
+
+				fences[num_fences++] = fence;
+				dma_fence_get(fence);
+			}
+		}
+
+		/* Retrieve GEM write objects fence */
+		for (i = 0; i < num_write_bo_handles; i++) {
+			struct dma_resv_iter resv_cursor;
+			struct dma_fence *fence;
+
+			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+						DMA_RESV_USAGE_WRITE, fence) {
+				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+					r = -EINVAL;
+					goto free_fences;
+				}
+
+				fences[num_fences++] = fence;
+				dma_fence_get(fence);
+			}
+		}
+
+		if (num_points) {
+			struct dma_fence_unwrap iter;
+			struct dma_fence *fence;
+			struct dma_fence *f;
+
+			for (i = 0; i < num_points; i++) {
+				r = drm_syncobj_find_fence(filp, timeline_handles[i],
+							   timeline_points[i],
+							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+							   &fence);
+				if (r)
+					goto free_fences;
+
+				dma_fence_unwrap_for_each(f, &iter, fence) {
+					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+						r = -EINVAL;
+						goto free_fences;
+					}
+
+					dma_fence_get(f);
+					fences[num_fences++] = f;
+				}
+
+				dma_fence_put(fence);
+			}
+		}
+
+		/* Retrieve syncobj's fence */
+		for (i = 0; i < num_syncobj; i++) {
+			struct dma_fence *fence;
+
+			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+						   0,
+						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+						   &fence);
+			if (r)
+				goto free_fences;
+
+			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+				r = -EINVAL;
+				goto free_fences;
+			}
+
+			fences[num_fences++] = fence;
+		}
+
+		/*
+		 * Keep only the latest fences to reduce the number of values
+		 * given back to userspace.
+		 */
+		num_fences = dma_fence_dedup_array(fences, num_fences);
+
+		waitq = xa_load(&userq_mgr->userq_mgr_xa, wait_info->waitq_id);
+		if (!waitq) {
+			r = -EINVAL;
+			goto free_fences;
+		}
+
+		for (i = 0, cnt = 0; i < num_fences; i++) {
+			struct amdgpu_userq_fence_driver *fence_drv;
+			struct amdgpu_userq_fence *userq_fence;
+			u32 index;
+
+			userq_fence = to_amdgpu_userq_fence(fences[i]);
+			if (!userq_fence) {
+				/*
+				 * Just waiting on other driver fences should
+				 * be good for now
+				 */
+				r = dma_fence_wait(fences[i], true);
+				if (r) {
+					dma_fence_put(fences[i]);
+					goto free_fences;
+				}
+
+				dma_fence_put(fences[i]);
+				continue;
+			}
+
+			fence_drv = userq_fence->fence_drv;
+			/*
+			 * We need to make sure the user queue release their reference
+			 * to the fence drivers at some point before queue destruction.
+			 * Otherwise, we would gather those references until we don't
+			 * have any more space left and crash.
+			 */
+			r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+				     xa_limit_32b, GFP_KERNEL);
+			if (r)
+				goto free_fences;
+
+			amdgpu_userq_fence_driver_get(fence_drv);
+
+			/* Store drm syncobj's gpu va address and value */
+			fence_info[cnt].va = fence_drv->va;
+			fence_info[cnt].value = fences[i]->seqno;
+
+			dma_fence_put(fences[i]);
+			/* Increment the actual userq fence count */
+			cnt++;
+		}
+
+		wait_info->num_fences = cnt;
+		/* Copy userq fence info to user space */
+		if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+				 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+			r = -EFAULT;
+			goto free_fences;
+		}
+
+		kfree(fences);
+		kfree(fence_info);
+	}
+
+	drm_exec_fini(&exec);
+	for (i = 0; i < num_read_bo_handles; i++)
+		drm_gem_object_put(gobj_read[i]);
+	kfree(gobj_read);
+
+	for (i = 0; i < num_write_bo_handles; i++)
+		drm_gem_object_put(gobj_write[i]);
+	kfree(gobj_write);
+
+	kfree(timeline_points);
+	kfree(timeline_handles);
+	kfree(syncobj_handles);
+	kfree(bo_handles_write);
+	kfree(bo_handles_read);
+
+	return 0;
+
+free_fences:
+	while (num_fences-- > 0)
+		dma_fence_put(fences[num_fences]);
+	kfree(fences);
+free_fence_info:
+	kfree(fence_info);
+exec_fini:
+	drm_exec_fini(&exec);
+put_gobj_write:
+	while (wentry-- > 0)
+		drm_gem_object_put(gobj_write[wentry]);
+	kfree(gobj_write);
+put_gobj_read:
+	while (rentry-- > 0)
+		drm_gem_object_put(gobj_read[rentry]);
+	kfree(gobj_read);
+free_timeline_points:
+	kfree(timeline_points);
+free_timeline_handles:
+	kfree(timeline_handles);
+free_syncobj_handles:
+	kfree(syncobj_handles);
+free_bo_handles_write:
+	kfree(bo_handles_write);
+free_bo_handles_read:
+	kfree(bo_handles_read);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..d76add2afc77
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+	struct dma_fence base;
+	/*
+	 * This lock is necessary to synchronize the
+	 * userqueue dma fence operations.
+	 */
+	spinlock_t lock;
+	struct list_head link;
+	unsigned long fence_drv_array_count;
+	struct amdgpu_userq_fence_driver *fence_drv;
+	struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+	struct kref refcount;
+	u64 va;
+	u64 gpu_addr;
+	u64 *cpu_addr;
+	u64 context;
+	/*
+	 * This lock is necesaary to synchronize the access
+	 * to the fences list by the fence driver.
+	 */
+	spinlock_t fence_list_lock;
+	struct list_head fences;
+	struct amdgpu_device *adev;
+	char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+				    struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_force_completion(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
new file mode 100644
index 000000000000..1e40ca3b1584
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_utils.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_UTILS_H_
+#define AMDGPU_UTILS_H_
+
+/* ---------- Generic 2‑bit capability attribute encoding ----------
+ * 00 INVALID, 01 RO, 10 WO, 11 RW
+ */
+enum amdgpu_cap_attr {
+	AMDGPU_CAP_ATTR_INVALID = 0,
+	AMDGPU_CAP_ATTR_RO      = 1 << 0,
+	AMDGPU_CAP_ATTR_WO      = 1 << 1,
+	AMDGPU_CAP_ATTR_RW      = (AMDGPU_CAP_ATTR_RO | AMDGPU_CAP_ATTR_WO),
+};
+
+#define AMDGPU_CAP_ATTR_BITS 2
+#define AMDGPU_CAP_ATTR_MAX  ((1U << AMDGPU_CAP_ATTR_BITS) - 1)
+
+/* Internal helper to build helpers for a given enum NAME */
+#define DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)							\
+enum { NAME##_BITMAP_BITS = NAME##_COUNT * AMDGPU_CAP_ATTR_BITS };				\
+struct NAME##_caps {										\
+	DECLARE_BITMAP(bmap, NAME##_BITMAP_BITS);						\
+};												\
+static inline unsigned int NAME##_ATTR_START(enum NAME##_cap_id cap)				\
+{ return (unsigned int)cap * AMDGPU_CAP_ATTR_BITS; }						\
+static inline void NAME##_attr_init(struct NAME##_caps *c)					\
+{ if (c) bitmap_zero(c->bmap, NAME##_BITMAP_BITS); }						\
+static inline int NAME##_attr_set(struct NAME##_caps *c,					\
+				  enum NAME##_cap_id cap, enum amdgpu_cap_attr attr)		\
+{												\
+	if (!c)											\
+		return -EINVAL;									\
+	if (cap >= NAME##_COUNT)								\
+		return -EINVAL;									\
+	if ((unsigned int)attr > AMDGPU_CAP_ATTR_MAX)						\
+		return -EINVAL;									\
+	bitmap_write(c->bmap, (unsigned long)attr,						\
+			NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS);				\
+	return 0;										\
+}												\
+static inline int NAME##_attr_get(const struct NAME##_caps *c,					\
+				  enum NAME##_cap_id cap, enum amdgpu_cap_attr *out)		\
+{												\
+	unsigned long v;									\
+	if (!c || !out)										\
+		return -EINVAL;									\
+	if (cap >= NAME##_COUNT)								\
+		return -EINVAL;									\
+	v = bitmap_read(c->bmap, NAME##_ATTR_START(cap), AMDGPU_CAP_ATTR_BITS);			\
+	*out = (enum amdgpu_cap_attr)v;								\
+	return 0;										\
+}												\
+static inline bool NAME##_cap_is_ro(const struct NAME##_caps *c, enum NAME##_cap_id id)		\
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RO; }	\
+static inline bool NAME##_cap_is_wo(const struct NAME##_caps *c, enum NAME##_cap_id id)		\
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_WO; }	\
+static inline bool NAME##_cap_is_rw(const struct NAME##_caps *c, enum NAME##_cap_id id)		\
+{ enum amdgpu_cap_attr a; return !NAME##_attr_get(c, id, &a) && a == AMDGPU_CAP_ATTR_RW; }
+
+/* Element expander for enum creation */
+#define _CAP_ENUM_ELEM(x) x,
+
+/* Public macro: declare enum + helpers from an X‑macro list */
+#define DECLARE_ATTR_CAP_CLASS(NAME, LIST_MACRO)						\
+	enum NAME##_cap_id { LIST_MACRO(_CAP_ENUM_ELEM) NAME##_COUNT };				\
+	DECLARE_ATTR_CAP_CLASS_HELPERS(NAME)
+
+#endif /* AMDGPU_UTILS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
new file mode 100644
index 000000000000..5c38f0d30c87
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -0,0 +1,1383 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_cs.h"
+#include "cikd.h"
+#include "uvd/uvd_4_2_d.h"
+
+#include "amdgpu_ras.h"
+
+/* 1 second timeout */
+#define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware versions for VI */
+#define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
+#define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
+#define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
+#define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
+
+/* Polaris10/11 firmware version */
+#define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))
+
+/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_TAHITI		"amdgpu/tahiti_uvd.bin"
+#define FIRMWARE_VERDE		"amdgpu/verde_uvd.bin"
+#define FIRMWARE_PITCAIRN	"amdgpu/pitcairn_uvd.bin"
+#define FIRMWARE_OLAND		"amdgpu/oland_uvd.bin"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#define FIRMWARE_BONAIRE	"amdgpu/bonaire_uvd.bin"
+#define FIRMWARE_KABINI	"amdgpu/kabini_uvd.bin"
+#define FIRMWARE_KAVERI	"amdgpu/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII	"amdgpu/hawaii_uvd.bin"
+#define FIRMWARE_MULLINS	"amdgpu/mullins_uvd.bin"
+#endif
+#define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"
+#define FIRMWARE_CARRIZO	"amdgpu/carrizo_uvd.bin"
+#define FIRMWARE_FIJI		"amdgpu/fiji_uvd.bin"
+#define FIRMWARE_STONEY		"amdgpu/stoney_uvd.bin"
+#define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
+#define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
+#define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_uvd.bin"
+
+#define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
+#define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_uvd.bin"
+
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
+#define UVD_GPCOM_VCPU_CMD		0x03c3
+#define UVD_GPCOM_VCPU_DATA0	0x03c4
+#define UVD_GPCOM_VCPU_DATA1	0x03c5
+#define UVD_NO_OP				0x03ff
+#define UVD_BASE_SI				0x3800
+
+/*
+ * amdgpu_uvd_cs_ctx - Command submission parser context
+ *
+ * Used for emulating virtual memory support on UVD 4.2.
+ */
+struct amdgpu_uvd_cs_ctx {
+	struct amdgpu_cs_parser *parser;
+	unsigned int reg, count;
+	unsigned int data0, data1;
+	unsigned int idx;
+	struct amdgpu_ib *ib;
+
+	/* does the IB has a msg command */
+	bool has_msg_cmd;
+
+	/* minimum buffer sizes */
+	unsigned int *buf_sizes;
+};
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+MODULE_FIRMWARE(FIRMWARE_VERDE);
+MODULE_FIRMWARE(FIRMWARE_PITCAIRN);
+MODULE_FIRMWARE(FIRMWARE_OLAND);
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+MODULE_FIRMWARE(FIRMWARE_KABINI);
+MODULE_FIRMWARE(FIRMWARE_KAVERI);
+MODULE_FIRMWARE(FIRMWARE_HAWAII);
+MODULE_FIRMWARE(FIRMWARE_MULLINS);
+#endif
+MODULE_FIRMWARE(FIRMWARE_TONGA);
+MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+MODULE_FIRMWARE(FIRMWARE_FIJI);
+MODULE_FIRMWARE(FIRMWARE_STONEY);
+MODULE_FIRMWARE(FIRMWARE_POLARIS10);
+MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
+
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
+
+static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
+
+static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
+					   uint32_t size,
+					   struct amdgpu_bo **bo_ptr)
+{
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_bo *bo = NULL;
+	void *addr;
+	int r;
+
+	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &bo, NULL, &addr);
+	if (r)
+		return r;
+
+	if (adev->uvd.address_64_bit)
+		goto succ;
+
+	amdgpu_bo_kunmap(bo);
+	amdgpu_bo_unpin(bo);
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+	amdgpu_uvd_force_into_uvd_segment(bo);
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r)
+		goto err;
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_VRAM);
+	if (r)
+		goto err_pin;
+	r = amdgpu_bo_kmap(bo, &addr);
+	if (r)
+		goto err_kmap;
+succ:
+	amdgpu_bo_unreserve(bo);
+	*bo_ptr = bo;
+	return 0;
+err_kmap:
+	amdgpu_bo_unpin(bo);
+err_pin:
+err:
+	amdgpu_bo_unreserve(bo);
+	amdgpu_bo_unref(&bo);
+	return r;
+}
+
+int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
+{
+	unsigned long bo_size;
+	const char *fw_name;
+	const struct common_firmware_header *hdr;
+	unsigned int family_id;
+	int i, j, r;
+
+	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
+
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+		fw_name = FIRMWARE_TAHITI;
+		break;
+	case CHIP_VERDE:
+		fw_name = FIRMWARE_VERDE;
+		break;
+	case CHIP_PITCAIRN:
+		fw_name = FIRMWARE_PITCAIRN;
+		break;
+	case CHIP_OLAND:
+		fw_name = FIRMWARE_OLAND;
+		break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+		fw_name = FIRMWARE_BONAIRE;
+		break;
+	case CHIP_KABINI:
+		fw_name = FIRMWARE_KABINI;
+		break;
+	case CHIP_KAVERI:
+		fw_name = FIRMWARE_KAVERI;
+		break;
+	case CHIP_HAWAII:
+		fw_name = FIRMWARE_HAWAII;
+		break;
+	case CHIP_MULLINS:
+		fw_name = FIRMWARE_MULLINS;
+		break;
+#endif
+	case CHIP_TONGA:
+		fw_name = FIRMWARE_TONGA;
+		break;
+	case CHIP_FIJI:
+		fw_name = FIRMWARE_FIJI;
+		break;
+	case CHIP_CARRIZO:
+		fw_name = FIRMWARE_CARRIZO;
+		break;
+	case CHIP_STONEY:
+		fw_name = FIRMWARE_STONEY;
+		break;
+	case CHIP_POLARIS10:
+		fw_name = FIRMWARE_POLARIS10;
+		break;
+	case CHIP_POLARIS11:
+		fw_name = FIRMWARE_POLARIS11;
+		break;
+	case CHIP_POLARIS12:
+		fw_name = FIRMWARE_POLARIS12;
+		break;
+	case CHIP_VEGA10:
+		fw_name = FIRMWARE_VEGA10;
+		break;
+	case CHIP_VEGA12:
+		fw_name = FIRMWARE_VEGA12;
+		break;
+	case CHIP_VEGAM:
+		fw_name = FIRMWARE_VEGAM;
+		break;
+	case CHIP_VEGA20:
+		fw_name = FIRMWARE_VEGA20;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
+			fw_name);
+		amdgpu_ucode_release(&adev->uvd.fw);
+		return r;
+	}
+
+	/* Set the default UVD handles that the firmware can handle */
+	adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES;
+
+	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+
+	if (adev->asic_type < CHIP_VEGA20) {
+		unsigned int version_major, version_minor;
+
+		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+		DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
+			version_major, version_minor, family_id);
+
+		/*
+		 * Limit the number of UVD handles depending on microcode major
+		 * and minor versions. The firmware version which has 40 UVD
+		 * instances support is 1.80. So all subsequent versions should
+		 * also have the same support.
+		 */
+		if ((version_major > 0x01) ||
+		    ((version_major == 0x01) && (version_minor >= 0x50)))
+			adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+		adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+					(family_id << 8));
+
+		if ((adev->asic_type == CHIP_POLARIS10 ||
+		     adev->asic_type == CHIP_POLARIS11) &&
+		    (adev->uvd.fw_version < FW_1_66_16))
+			DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n",
+				  version_major, version_minor);
+	} else {
+		unsigned int enc_major, enc_minor, dec_minor;
+
+		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
+		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
+		DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
+			enc_major, enc_minor, dec_minor, family_id);
+
+		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+		adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
+	}
+
+	bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
+		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM |
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &adev->uvd.inst[j].vcpu_bo,
+					    &adev->uvd.inst[j].gpu_addr,
+					    &adev->uvd.inst[j].cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+			return r;
+		}
+	}
+
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
+		atomic_set(&adev->uvd.handles[i], 0);
+		adev->uvd.filp[i] = NULL;
+	}
+
+	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
+	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
+		adev->uvd.address_64_bit = true;
+
+	r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
+	if (r)
+		return r;
+
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
+		break;
+	case CHIP_CARRIZO:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
+		break;
+	case CHIP_FIJI:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
+		break;
+	case CHIP_STONEY:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
+		break;
+	default:
+		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
+	}
+
+	return 0;
+}
+
+int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
+{
+	void *addr = amdgpu_bo_kptr(adev->uvd.ib_bo);
+	int i, j;
+
+	drm_sched_entity_destroy(&adev->uvd.entity);
+
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
+		kvfree(adev->uvd.inst[j].saved_bo);
+
+		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+				      &adev->uvd.inst[j].gpu_addr,
+				      (void **)&adev->uvd.inst[j].cpu_addr);
+
+		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
+
+		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+	}
+	amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, &addr);
+	amdgpu_ucode_release(&adev->uvd.fw);
+
+	return 0;
+}
+
+/**
+ * amdgpu_uvd_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
+ *
+ * Initialize the entity used for handle management in the kernel driver.
+ */
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (ring == &adev->uvd.inst[0].ring) {
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int r;
+
+		r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+					  &sched, 1, NULL);
+		if (r) {
+			DRM_ERROR("Failed setting up UVD kernel entity.\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev)
+{
+	unsigned int size;
+	void *ptr;
+	int i, j, idx;
+
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
+	/* only valid for physical mode */
+	if (adev->asic_type < CHIP_POLARIS10) {
+		for (i = 0; i < adev->uvd.max_handles; ++i)
+			if (atomic_read(&adev->uvd.handles[i]))
+				break;
+
+		if (i == adev->uvd.max_handles)
+			return 0;
+	}
+
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
+		if (adev->uvd.inst[j].vcpu_bo == NULL)
+			continue;
+
+		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+		ptr = adev->uvd.inst[j].cpu_addr;
+
+		adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
+		if (!adev->uvd.inst[j].saved_bo)
+			return -ENOMEM;
+
+		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+			/* re-write 0 since err_event_athub will corrupt VCPU buffer */
+			if (amdgpu_ras_intr_triggered())
+				memset(adev->uvd.inst[j].saved_bo, 0, size);
+			else
+				memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
+
+			drm_dev_exit(idx);
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_uvd_suspend(struct amdgpu_device *adev)
+{
+	if (amdgpu_ras_intr_triggered())
+		DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n");
+
+	return 0;
+}
+
+int amdgpu_uvd_resume(struct amdgpu_device *adev)
+{
+	unsigned int size;
+	void *ptr;
+	int i, idx;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		if (adev->uvd.harvest_config & (1 << i))
+			continue;
+		if (adev->uvd.inst[i].vcpu_bo == NULL)
+			return -EINVAL;
+
+		size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+		ptr = adev->uvd.inst[i].cpu_addr;
+
+		if (adev->uvd.inst[i].saved_bo != NULL) {
+			if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+				memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+				drm_dev_exit(idx);
+			}
+			kvfree(adev->uvd.inst[i].saved_bo);
+			adev->uvd.inst[i].saved_bo = NULL;
+		} else {
+			const struct common_firmware_header *hdr;
+			unsigned int offset;
+
+			hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+				if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+					memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+						    le32_to_cpu(hdr->ucode_size_bytes));
+					drm_dev_exit(idx);
+				}
+				size -= le32_to_cpu(hdr->ucode_size_bytes);
+				ptr += le32_to_cpu(hdr->ucode_size_bytes);
+			}
+			memset_io(ptr, 0, size);
+			/* to restore uvd fence seq */
+			amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
+		}
+	}
+	return 0;
+}
+
+void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
+{
+	struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
+	int i, r;
+
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
+		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
+
+		if (handle != 0 && adev->uvd.filp[i] == filp) {
+			struct dma_fence *fence;
+
+			r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
+						       &fence);
+			if (r) {
+				DRM_ERROR("Error destroying UVD %d!\n", r);
+				continue;
+			}
+
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+
+			adev->uvd.filp[i] = NULL;
+			atomic_set(&adev->uvd.handles[i], 0);
+		}
+	}
+}
+
+static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
+{
+	int i;
+
+	for (i = 0; i < abo->placement.num_placement; ++i) {
+		abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
+		abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+		if (abo->placements[i].mem_type == TTM_PL_VRAM)
+			abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+	}
+}
+
+static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx)
+{
+	uint32_t lo, hi;
+	uint64_t addr;
+
+	lo = amdgpu_ib_get_value(ctx->ib, ctx->data0);
+	hi = amdgpu_ib_get_value(ctx->ib, ctx->data1);
+	addr = ((uint64_t)lo) | (((uint64_t)hi) << 32);
+
+	return addr;
+}
+
+/**
+ * amdgpu_uvd_cs_pass1 - first parsing round
+ *
+ * @ctx: UVD parser context
+ *
+ * Make sure UVD message and feedback buffers are in VRAM and
+ * nobody is violating an 256MB boundary.
+ */
+static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
+{
+	struct ttm_operation_ctx tctx = { false, false };
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo;
+	uint32_t cmd;
+	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
+	int r = 0;
+
+	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+		return r;
+	}
+
+	if (!ctx->parser->adev->uvd.address_64_bit) {
+		/* check if it's a message or feedback command */
+		cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
+		if (cmd == 0x0 || cmd == 0x3) {
+			/* yes, force it into VRAM */
+			uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+			amdgpu_bo_placement_from_domain(bo, domain);
+		}
+		amdgpu_uvd_force_into_uvd_segment(bo);
+
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx);
+	}
+
+	return r;
+}
+
+/**
+ * amdgpu_uvd_cs_msg_decode - handle UVD decode message
+ *
+ * @adev: amdgpu_device pointer
+ * @msg: pointer to message structure
+ * @buf_sizes: placeholder to put the different buffer lengths
+ *
+ * Peek into the decode message and calculate the necessary buffer sizes.
+ */
+static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
+	unsigned int buf_sizes[])
+{
+	unsigned int stream_type = msg[4];
+	unsigned int width = msg[6];
+	unsigned int height = msg[7];
+	unsigned int dpb_size = msg[9];
+	unsigned int pitch = msg[28];
+	unsigned int level = msg[57];
+
+	unsigned int width_in_mb = width / 16;
+	unsigned int height_in_mb = ALIGN(height / 16, 2);
+	unsigned int fs_in_mb = width_in_mb * height_in_mb;
+
+	unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer;
+	unsigned int min_ctx_size = ~0;
+
+	image_size = width * height;
+	image_size += image_size / 2;
+	image_size = ALIGN(image_size, 1024);
+
+	switch (stream_type) {
+	case 0: /* H264 */
+		switch (level) {
+		case 30:
+			num_dpb_buffer = 8100 / fs_in_mb;
+			break;
+		case 31:
+			num_dpb_buffer = 18000 / fs_in_mb;
+			break;
+		case 32:
+			num_dpb_buffer = 20480 / fs_in_mb;
+			break;
+		case 41:
+			num_dpb_buffer = 32768 / fs_in_mb;
+			break;
+		case 42:
+			num_dpb_buffer = 34816 / fs_in_mb;
+			break;
+		case 50:
+			num_dpb_buffer = 110400 / fs_in_mb;
+			break;
+		case 51:
+			num_dpb_buffer = 184320 / fs_in_mb;
+			break;
+		default:
+			num_dpb_buffer = 184320 / fs_in_mb;
+			break;
+		}
+		num_dpb_buffer++;
+		if (num_dpb_buffer > 17)
+			num_dpb_buffer = 17;
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * num_dpb_buffer;
+
+		/* macroblock context buffer */
+		min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192;
+
+		/* IT surface buffer */
+		min_dpb_size += width_in_mb * height_in_mb * 32;
+		break;
+
+	case 1: /* VC1 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+
+		/* CONTEXT_BUFFER */
+		min_dpb_size += width_in_mb * height_in_mb * 128;
+
+		/* IT surface buffer */
+		min_dpb_size += width_in_mb * 64;
+
+		/* DB surface buffer */
+		min_dpb_size += width_in_mb * 128;
+
+		/* BP */
+		tmp = max(width_in_mb, height_in_mb);
+		min_dpb_size += ALIGN(tmp * 7 * 16, 64);
+		break;
+
+	case 3: /* MPEG2 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+		break;
+
+	case 4: /* MPEG4 */
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * 3;
+
+		/* CM */
+		min_dpb_size += width_in_mb * height_in_mb * 64;
+
+		/* IT surface buffer */
+		min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
+		break;
+
+	case 7: /* H264 Perf */
+		switch (level) {
+		case 30:
+			num_dpb_buffer = 8100 / fs_in_mb;
+			break;
+		case 31:
+			num_dpb_buffer = 18000 / fs_in_mb;
+			break;
+		case 32:
+			num_dpb_buffer = 20480 / fs_in_mb;
+			break;
+		case 41:
+			num_dpb_buffer = 32768 / fs_in_mb;
+			break;
+		case 42:
+			num_dpb_buffer = 34816 / fs_in_mb;
+			break;
+		case 50:
+			num_dpb_buffer = 110400 / fs_in_mb;
+			break;
+		case 51:
+			num_dpb_buffer = 184320 / fs_in_mb;
+			break;
+		default:
+			num_dpb_buffer = 184320 / fs_in_mb;
+			break;
+		}
+		num_dpb_buffer++;
+		if (num_dpb_buffer > 17)
+			num_dpb_buffer = 17;
+
+		/* reference picture buffer */
+		min_dpb_size = image_size * num_dpb_buffer;
+
+		if (!adev->uvd.use_ctx_buf) {
+			/* macroblock context buffer */
+			min_dpb_size +=
+				width_in_mb * height_in_mb * num_dpb_buffer * 192;
+
+			/* IT surface buffer */
+			min_dpb_size += width_in_mb * height_in_mb * 32;
+		} else {
+			/* macroblock context buffer */
+			min_ctx_size =
+				width_in_mb * height_in_mb * num_dpb_buffer * 192;
+		}
+		break;
+
+	case 8: /* MJPEG */
+		min_dpb_size = 0;
+		break;
+
+	case 16: /* H265 */
+		image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2;
+		image_size = ALIGN(image_size, 256);
+
+		num_dpb_buffer = (le32_to_cpu(msg[59]) & 0xff) + 2;
+		min_dpb_size = image_size * num_dpb_buffer;
+		min_ctx_size = ((width + 255) / 16) * ((height + 255) / 16)
+					   * 16 * num_dpb_buffer + 52 * 1024;
+		break;
+
+	default:
+		DRM_ERROR("UVD codec not handled %d!\n", stream_type);
+		return -EINVAL;
+	}
+
+	if (width > pitch) {
+		DRM_ERROR("Invalid UVD decoding target pitch!\n");
+		return -EINVAL;
+	}
+
+	if (dpb_size < min_dpb_size) {
+		DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
+			  dpb_size, min_dpb_size);
+		return -EINVAL;
+	}
+
+	buf_sizes[0x1] = dpb_size;
+	buf_sizes[0x2] = image_size;
+	buf_sizes[0x4] = min_ctx_size;
+	/* store image width to adjust nb memory pstate */
+	adev->uvd.decode_image_width = width;
+	return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_msg - handle UVD message
+ *
+ * @ctx: UVD parser context
+ * @bo: buffer object containing the message
+ * @offset: offset into the buffer object
+ *
+ * Peek into the UVD message and extract the session id.
+ * Make sure that we don't open up to many sessions.
+ */
+static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
+			     struct amdgpu_bo *bo, unsigned int offset)
+{
+	struct amdgpu_device *adev = ctx->parser->adev;
+	int32_t *msg, msg_type, handle;
+	void *ptr;
+	long r;
+	int i;
+
+	if (offset & 0x3F) {
+		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+		return -EINVAL;
+	}
+
+	r = amdgpu_bo_kmap(bo, &ptr);
+	if (r) {
+		DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
+		return r;
+	}
+
+	msg = ptr + offset;
+
+	msg_type = msg[1];
+	handle = msg[2];
+
+	if (handle == 0) {
+		amdgpu_bo_kunmap(bo);
+		DRM_ERROR("Invalid UVD handle!\n");
+		return -EINVAL;
+	}
+
+	switch (msg_type) {
+	case 0:
+		/* it's a create msg, calc image size (width * height) */
+		amdgpu_bo_kunmap(bo);
+
+		/* try to alloc a new handle */
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			if (atomic_read(&adev->uvd.handles[i]) == handle) {
+				DRM_ERROR(")Handle 0x%x already in use!\n",
+					  handle);
+				return -EINVAL;
+			}
+
+			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
+				adev->uvd.filp[i] = ctx->parser->filp;
+				return 0;
+			}
+		}
+
+		DRM_ERROR("No more free UVD handles!\n");
+		return -ENOSPC;
+
+	case 1:
+		/* it's a decode msg, calc buffer sizes */
+		r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes);
+		amdgpu_bo_kunmap(bo);
+		if (r)
+			return r;
+
+		/* validate the handle */
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			if (atomic_read(&adev->uvd.handles[i]) == handle) {
+				if (adev->uvd.filp[i] != ctx->parser->filp) {
+					DRM_ERROR("UVD handle collision detected!\n");
+					return -EINVAL;
+				}
+				return 0;
+			}
+		}
+
+		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+		return -ENOENT;
+
+	case 2:
+		/* it's a destroy msg, free the handle */
+		for (i = 0; i < adev->uvd.max_handles; ++i)
+			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+		amdgpu_bo_kunmap(bo);
+		return 0;
+
+	default:
+		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+	}
+
+	amdgpu_bo_kunmap(bo);
+	return -EINVAL;
+}
+
+/**
+ * amdgpu_uvd_cs_pass2 - second parsing round
+ *
+ * @ctx: UVD parser context
+ *
+ * Patch buffer addresses, make sure buffer sizes are correct.
+ */
+static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo;
+	uint32_t cmd;
+	uint64_t start, end;
+	uint64_t addr = amdgpu_uvd_get_addr_from_ctx(ctx);
+	int r;
+
+	r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+		return r;
+	}
+
+	start = amdgpu_bo_gpu_offset(bo);
+
+	end = (mapping->last + 1 - mapping->start);
+	end = end * AMDGPU_GPU_PAGE_SIZE + start;
+
+	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
+	start += addr;
+
+	amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start));
+	amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start));
+
+	cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1;
+	if (cmd < 0x4) {
+		if ((end - start) < ctx->buf_sizes[cmd]) {
+			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
+				  (unsigned int)(end - start),
+				  ctx->buf_sizes[cmd]);
+			return -EINVAL;
+		}
+
+	} else if (cmd == 0x206) {
+		if ((end - start) < ctx->buf_sizes[4]) {
+			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
+					  (unsigned int)(end - start),
+					  ctx->buf_sizes[4]);
+			return -EINVAL;
+		}
+	} else if ((cmd != 0x100) && (cmd != 0x204)) {
+		DRM_ERROR("invalid UVD command %X!\n", cmd);
+		return -EINVAL;
+	}
+
+	if (!ctx->parser->adev->uvd.address_64_bit) {
+		if ((start >> 28) != ((end - 1) >> 28)) {
+			DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n",
+				  start, end);
+			return -EINVAL;
+		}
+
+		if ((cmd == 0 || cmd == 0x3) &&
+		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
+			DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n",
+				  start, end);
+			return -EINVAL;
+		}
+	}
+
+	if (cmd == 0) {
+		ctx->has_msg_cmd = true;
+		r = amdgpu_uvd_cs_msg(ctx, bo, addr);
+		if (r)
+			return r;
+	} else if (!ctx->has_msg_cmd) {
+		DRM_ERROR("Message needed before other commands are send!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_reg - parse register writes
+ *
+ * @ctx: UVD parser context
+ * @cb: callback function
+ *
+ * Parse the register writes, call cb on each complete command.
+ */
+static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
+			     int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
+{
+	int i, r;
+
+	ctx->idx++;
+	for (i = 0; i <= ctx->count; ++i) {
+		unsigned int reg = ctx->reg + i;
+
+		if (ctx->idx >= ctx->ib->length_dw) {
+			DRM_ERROR("Register command after end of CS!\n");
+			return -EINVAL;
+		}
+
+		switch (reg) {
+		case mmUVD_GPCOM_VCPU_DATA0:
+			ctx->data0 = ctx->idx;
+			break;
+		case mmUVD_GPCOM_VCPU_DATA1:
+			ctx->data1 = ctx->idx;
+			break;
+		case mmUVD_GPCOM_VCPU_CMD:
+			r = cb(ctx);
+			if (r)
+				return r;
+			break;
+		case mmUVD_ENGINE_CNTL:
+		case mmUVD_NO_OP:
+			break;
+		default:
+			DRM_ERROR("Invalid reg 0x%X!\n", reg);
+			return -EINVAL;
+		}
+		ctx->idx++;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_uvd_cs_packets - parse UVD packets
+ *
+ * @ctx: UVD parser context
+ * @cb: callback function
+ *
+ * Parse the command stream packets.
+ */
+static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
+				 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
+{
+	int r;
+
+	for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) {
+		uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx);
+		unsigned int type = CP_PACKET_GET_TYPE(cmd);
+
+		switch (type) {
+		case PACKET_TYPE0:
+			ctx->reg = CP_PACKET0_GET_REG(cmd);
+			ctx->count = CP_PACKET_GET_COUNT(cmd);
+			r = amdgpu_uvd_cs_reg(ctx, cb);
+			if (r)
+				return r;
+			break;
+		case PACKET_TYPE2:
+			++ctx->idx;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", type);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_uvd_ring_parse_cs - UVD command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ *
+ * Parse the command stream, patch in addresses as necessary.
+ */
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+			     struct amdgpu_job *job,
+			     struct amdgpu_ib *ib)
+{
+	struct amdgpu_uvd_cs_ctx ctx = {};
+	unsigned int buf_sizes[] = {
+		[0x00000000]	=	2048,
+		[0x00000001]	=	0xFFFFFFFF,
+		[0x00000002]	=	0xFFFFFFFF,
+		[0x00000003]	=	2048,
+		[0x00000004]	=	0xFFFFFFFF,
+	};
+	int r;
+
+	job->vm = NULL;
+
+	if (ib->length_dw % 16) {
+		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
+			  ib->length_dw);
+		return -EINVAL;
+	}
+
+	ctx.parser = parser;
+	ctx.buf_sizes = buf_sizes;
+	ctx.ib = ib;
+
+	/* first round only required on chips without UVD 64 bit address support */
+	if (!parser->adev->uvd.address_64_bit) {
+		/* first round, make sure the buffers are actually in the UVD segment */
+		r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass1);
+		if (r)
+			return r;
+	}
+
+	/* second round, patch buffer addresses into the command stream */
+	r = amdgpu_uvd_cs_packets(&ctx, amdgpu_uvd_cs_pass2);
+	if (r)
+		return r;
+
+	if (!ctx.has_msg_cmd) {
+		DRM_ERROR("UVD-IBs need a msg command!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+			       bool direct, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	uint32_t offset, data[4];
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	uint64_t addr;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     64, direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job,
+				     AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	if (adev->asic_type >= CHIP_VEGA10)
+		offset = adev->reg_offset[UVD_HWIP][ring->me][1];
+	else
+		offset = UVD_BASE_SI;
+
+	data[0] = PACKET0(offset + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset + UVD_NO_OP, 0);
+
+	ib = &job->ibs[0];
+	addr = amdgpu_bo_gpu_offset(bo);
+	ib->ptr[0] = data[0];
+	ib->ptr[1] = addr;
+	ib->ptr[2] = data[1];
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = data[2];
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = data[3];
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	if (direct) {
+		r = amdgpu_job_submit_direct(job, ring, &f);
+		if (r)
+			goto err_free;
+	} else {
+		r = drm_sched_job_add_resv_dependencies(&job->base,
+							bo->tbo.base.resv,
+							DMA_RESV_USAGE_KERNEL);
+		if (r)
+			goto err_free;
+
+		f = amdgpu_job_submit(job);
+	}
+
+	amdgpu_bo_reserve(bo, true);
+	amdgpu_bo_fence(bo, f, false);
+	amdgpu_bo_unreserve(bo);
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err_free:
+	amdgpu_job_free(job);
+	return r;
+}
+
+/* multiple fence commands without any stream commands in between can
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
+int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+			      struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo = adev->uvd.ib_bo;
+	uint32_t *msg;
+	int i;
+
+	msg = amdgpu_bo_kptr(bo);
+	/* stitch together an UVD create msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000000);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(0x00000000);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000000);
+	msg[7] = cpu_to_le32(0x00000780);
+	msg[8] = cpu_to_le32(0x00000440);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x01b37000);
+	for (i = 11; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	return amdgpu_uvd_send_msg(ring, bo, true, fence);
+
+}
+
+int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+			       bool direct, struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_bo *bo = NULL;
+	uint32_t *msg;
+	int r, i;
+
+	if (direct) {
+		bo = adev->uvd.ib_bo;
+	} else {
+		r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
+		if (r)
+			return r;
+	}
+
+	msg = amdgpu_bo_kptr(bo);
+	/* stitch together an UVD destroy msg */
+	msg[0] = cpu_to_le32(0x00000de4);
+	msg[1] = cpu_to_le32(0x00000002);
+	msg[2] = cpu_to_le32(handle);
+	msg[3] = cpu_to_le32(0x00000000);
+	for (i = 4; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
+
+	if (!direct)
+		amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
+
+	return r;
+}
+
+static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, uvd.idle_work.work);
+	unsigned int fences = 0, i, j;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		if (adev->uvd.harvest_config & (1 << i))
+			continue;
+		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+		for (j = 0; j < adev->uvd.num_enc_rings; ++j)
+			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+	}
+
+	if (fences == 0) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, false);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+			/* shutdown the UVD block */
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_GATE);
+		}
+	} else {
+		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+	}
+}
+
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+	if (set_clocks) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_uvd(adev, true);
+		} else {
+			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_CG_STATE_UNGATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							       AMD_PG_STATE_UNGATE);
+		}
+	}
+}
+
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
+{
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_uvd_ring_test_ib - test ib execution
+ *
+ * @ring: amdgpu_ring pointer
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test if we can successfully execute an IB
+ */
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence;
+	long r;
+
+	r = amdgpu_uvd_get_create_msg(ring, 1, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	dma_fence_put(fence);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	if (r < 0)
+		goto error;
+
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	else if (r > 0)
+		r = 0;
+
+	dma_fence_put(fence);
+
+error:
+	return r;
+}
+
+/**
+ * amdgpu_uvd_used_handles - returns used UVD handles
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the number of UVD handles in use
+ */
+uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
+{
+	unsigned int i;
+	uint32_t used_handles = 0;
+
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
+		/*
+		 * Handles can be freed in any order, and not
+		 * necessarily linear. So we need to count
+		 * all non-zero handles.
+		 */
+		if (atomic_read(&adev->uvd.handles[i]))
+			used_handles++;
+	}
+
+	return used_handles;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
new file mode 100644
index 000000000000..9dfad2f48ef4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_UVD_H__
+#define __AMDGPU_UVD_H__
+
+#define AMDGPU_DEFAULT_UVD_HANDLES	10
+#define AMDGPU_MAX_UVD_HANDLES		40
+#define AMDGPU_UVD_STACK_SIZE		(200*1024)
+#define AMDGPU_UVD_HEAP_SIZE		(256*1024)
+#define AMDGPU_UVD_SESSION_SIZE		(50*1024)
+#define AMDGPU_UVD_FIRMWARE_OFFSET	256
+
+#define AMDGPU_MAX_UVD_INSTANCES			2
+
+#define AMDGPU_UVD_FIRMWARE_SIZE(adev)    \
+	(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
+			       8) - AMDGPU_UVD_FIRMWARE_OFFSET)
+
+struct amdgpu_uvd_inst {
+	struct amdgpu_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	void			*saved_bo;
+	struct amdgpu_ring	ring;
+	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
+	struct amdgpu_irq_src	irq;
+	uint32_t                srbm_soft_reset;
+};
+
+#define AMDGPU_UVD_HARVEST_UVD0 (1 << 0)
+#define AMDGPU_UVD_HARVEST_UVD1 (1 << 1)
+
+struct amdgpu_uvd {
+	const struct firmware	*fw;	/* UVD firmware */
+	unsigned		fw_version;
+	unsigned		max_handles;
+	unsigned		num_enc_rings;
+	uint8_t			num_uvd_inst;
+	bool			address_64_bit;
+	bool			use_ctx_buf;
+	struct amdgpu_uvd_inst	inst[AMDGPU_MAX_UVD_INSTANCES];
+	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
+	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
+	struct drm_sched_entity entity;
+	struct delayed_work	idle_work;
+	unsigned		harvest_config;
+	/* store image width to adjust nb memory state */
+	unsigned		decode_image_width;
+	uint32_t                keyselect;
+	struct amdgpu_bo	*ib_bo;
+};
+
+int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
+int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev);
+int amdgpu_uvd_suspend(struct amdgpu_device *adev);
+int amdgpu_uvd_resume(struct amdgpu_device *adev);
+int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+			      struct dma_fence **fence);
+int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+			       bool direct, struct dma_fence **fence);
+void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
+			     struct drm_file *filp);
+int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser,
+			     struct amdgpu_job *job,
+			     struct amdgpu_ib *ib);
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
new file mode 100644
index 000000000000..a7d8f1ce6ac2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -0,0 +1,1228 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_cs.h"
+#include "cikd.h"
+
+/* 1 second timeout */
+#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware Names */
+#ifdef CONFIG_DRM_AMDGPU_SI
+#define FIRMWARE_VCE_V1_0	"amdgpu/vce_1_0_0.bin"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
+#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
+#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
+#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
+#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
+#endif
+#define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
+#define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
+#define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
+#define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
+#define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
+#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
+
+#define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
+#define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
+
+#ifdef CONFIG_DRM_AMDGPU_SI
+MODULE_FIRMWARE(FIRMWARE_VCE_V1_0);
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+MODULE_FIRMWARE(FIRMWARE_KABINI);
+MODULE_FIRMWARE(FIRMWARE_KAVERI);
+MODULE_FIRMWARE(FIRMWARE_HAWAII);
+MODULE_FIRMWARE(FIRMWARE_MULLINS);
+#endif
+MODULE_FIRMWARE(FIRMWARE_TONGA);
+MODULE_FIRMWARE(FIRMWARE_CARRIZO);
+MODULE_FIRMWARE(FIRMWARE_FIJI);
+MODULE_FIRMWARE(FIRMWARE_STONEY);
+MODULE_FIRMWARE(FIRMWARE_POLARIS10);
+MODULE_FIRMWARE(FIRMWARE_POLARIS11);
+MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
+
+MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
+
+static void amdgpu_vce_idle_work_handler(struct work_struct *work);
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+				     struct dma_fence **fence);
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+				      bool direct, struct dma_fence **fence);
+
+/**
+ * amdgpu_vce_firmware_name() - determine the firmware file name for VCE
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Each chip that has VCE IP may need a different firmware.
+ * This function returns the name of the VCE firmware file
+ * appropriate for the current chip.
+ */
+static const char *amdgpu_vce_firmware_name(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_PITCAIRN:
+	case CHIP_TAHITI:
+	case CHIP_VERDE:
+		return FIRMWARE_VCE_V1_0;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+		return FIRMWARE_BONAIRE;
+	case CHIP_KAVERI:
+		return FIRMWARE_KAVERI;
+	case CHIP_KABINI:
+		return FIRMWARE_KABINI;
+	case CHIP_HAWAII:
+		return FIRMWARE_HAWAII;
+	case CHIP_MULLINS:
+		return FIRMWARE_MULLINS;
+#endif
+	case CHIP_TONGA:
+		return  FIRMWARE_TONGA;
+	case CHIP_CARRIZO:
+		return  FIRMWARE_CARRIZO;
+	case CHIP_FIJI:
+		return  FIRMWARE_FIJI;
+	case CHIP_STONEY:
+		return  FIRMWARE_STONEY;
+	case CHIP_POLARIS10:
+		return  FIRMWARE_POLARIS10;
+	case CHIP_POLARIS11:
+		return  FIRMWARE_POLARIS11;
+	case CHIP_POLARIS12:
+		return  FIRMWARE_POLARIS12;
+	case CHIP_VEGAM:
+		return  FIRMWARE_VEGAM;
+	case CHIP_VEGA10:
+		return  FIRMWARE_VEGA10;
+	case CHIP_VEGA12:
+		return  FIRMWARE_VEGA12;
+	case CHIP_VEGA20:
+		return  FIRMWARE_VEGA20;
+
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * amdgpu_vce_early_init() - try to load VCE firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tries to load the VCE firmware.
+ *
+ * When not found, returns ENOENT so that the driver can
+ * still load and initialize the rest of the IP blocks.
+ * The GPU can function just fine without VCE, they will just
+ * not support video encoding.
+ */
+int amdgpu_vce_early_init(struct amdgpu_device *adev)
+{
+	const char *fw_name = amdgpu_vce_firmware_name(adev);
+	const struct common_firmware_header *hdr;
+	unsigned int ucode_version, version_major, version_minor, binary_id;
+	int r;
+
+	if (!fw_name)
+		return -ENOENT;
+
+	r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
+	if (r) {
+		dev_err(adev->dev,
+			"amdgpu_vce: Firmware \"%s\" not found or failed to validate (%d)\n",
+			fw_name, r);
+
+		amdgpu_ucode_release(&adev->vce.fw);
+		return -ENOENT;
+	}
+
+	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+
+	ucode_version = le32_to_cpu(hdr->ucode_version);
+	version_major = (ucode_version >> 20) & 0xfff;
+	version_minor = (ucode_version >> 8) & 0xfff;
+	binary_id = ucode_version & 0xff;
+	dev_info(adev->dev, "Found VCE firmware Version: %d.%d Binary ID: %d\n",
+		version_major, version_minor, binary_id);
+	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
+				(binary_id << 8));
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_sw_init() - allocate memory for VCE BO
+ *
+ * @adev: amdgpu_device pointer
+ * @size: size for the new BO
+ *
+ * First step to get VCE online: allocate memory for VCE BO.
+ * The VCE firmware binary is copied into the VCE BO later,
+ * in amdgpu_vce_resume. The VCE executes its code from the
+ * VCE BO and also uses the space in this BO for its stack and data.
+ *
+ * Ideally this BO should be placed in VRAM for optimal performance,
+ * although technically it also runs from system RAM (albeit slowly).
+ */
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
+{
+	int i, r;
+
+	if (!adev->vce.fw)
+		return -ENOENT;
+
+	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->vce.vcpu_bo,
+				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
+		return r;
+	}
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+		atomic_set(&adev->vce.handles[i], 0);
+		adev->vce.filp[i] = NULL;
+	}
+
+	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
+	mutex_init(&adev->vce.idle_mutex);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_sw_fini - free memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Last step on VCE teardown, free firmware memory
+ */
+int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
+{
+	unsigned int i;
+
+	if (adev->vce.vcpu_bo == NULL)
+		return 0;
+
+	drm_sched_entity_destroy(&adev->vce.entity);
+
+	for (i = 0; i < adev->vce.num_rings; i++)
+		amdgpu_ring_fini(&adev->vce.ring[i]);
+
+	amdgpu_ucode_release(&adev->vce.fw);
+	mutex_destroy(&adev->vce.idle_mutex);
+
+	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+		(void **)&adev->vce.cpu_addr);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
+ *
+ * Initialize the entity used for handle management in the kernel driver.
+ */
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (ring == &adev->vce.ring[0]) {
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int r;
+
+		r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+					  &sched, 1, NULL);
+		if (r != 0) {
+			DRM_ERROR("Failed setting up VCE run queue.\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_suspend - unpin VCE fw memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_suspend(struct amdgpu_device *adev)
+{
+	int i;
+
+	cancel_delayed_work_sync(&adev->vce.idle_work);
+
+	if (adev->vce.vcpu_bo == NULL)
+		return 0;
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (atomic_read(&adev->vce.handles[i]))
+			break;
+
+	if (i == AMDGPU_MAX_VCE_HANDLES)
+		return 0;
+
+	/* TODO: suspending running encoding sessions isn't supported */
+	return -EINVAL;
+}
+
+/**
+ * amdgpu_vce_resume - pin VCE fw memory
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_resume(struct amdgpu_device *adev)
+{
+	const struct common_firmware_header *hdr;
+	unsigned int offset;
+	int idx;
+
+	if (adev->vce.vcpu_bo == NULL)
+		return -EINVAL;
+
+	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
+	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		memset_io(adev->vce.cpu_addr, 0, amdgpu_bo_size(adev->vce.vcpu_bo));
+		memcpy_toio(adev->vce.cpu_addr, adev->vce.fw->data + offset,
+			    adev->vce.fw->size - offset);
+		drm_dev_exit(idx);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_idle_work_handler - power off VCE
+ *
+ * @work: pointer to work structure
+ *
+ * power of VCE when it's not used any more
+ */
+static void amdgpu_vce_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vce.idle_work.work);
+	unsigned int i, count = 0;
+
+	for (i = 0; i < adev->vce.num_rings; i++)
+		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
+
+	if (count == 0) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_vce(adev, false);
+		} else {
+			amdgpu_asic_set_vce_clocks(adev, 0, 0);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_GATE);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_GATE);
+		}
+	} else {
+		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+	}
+}
+
+/**
+ * amdgpu_vce_ring_begin_use - power up VCE
+ *
+ * @ring: amdgpu ring
+ *
+ * Make sure VCE is powerd up when we want to use it
+ */
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	mutex_lock(&adev->vce.idle_mutex);
+	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
+	if (set_clocks) {
+		if (adev->pm.dpm_enabled) {
+			amdgpu_dpm_enable_vce(adev, true);
+		} else {
+			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
+			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_CG_STATE_UNGATE);
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							       AMD_PG_STATE_UNGATE);
+
+		}
+	}
+	mutex_unlock(&adev->vce.idle_mutex);
+}
+
+/**
+ * amdgpu_vce_ring_end_use - power VCE down
+ *
+ * @ring: amdgpu ring
+ *
+ * Schedule work to power VCE down again
+ */
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
+{
+	if (!amdgpu_sriov_vf(ring->adev))
+		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_vce_free_handles - free still open VCE handles
+ *
+ * @adev: amdgpu_device pointer
+ * @filp: drm file pointer
+ *
+ * Close all VCE handles still open by this file pointer
+ */
+void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
+{
+	struct amdgpu_ring *ring = &adev->vce.ring[0];
+	int i, r;
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+		uint32_t handle = atomic_read(&adev->vce.handles[i]);
+
+		if (!handle || adev->vce.filp[i] != filp)
+			continue;
+
+		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
+		if (r)
+			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
+
+		adev->vce.filp[i] = NULL;
+		atomic_set(&adev->vce.handles[i], 0);
+	}
+}
+
+/**
+ * amdgpu_vce_required_gart_pages() - gets number of GART pages required by VCE
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns how many GART pages we need before GTT for the VCE IP block.
+ * For VCE1, see vce_v1_0_ensure_vcpu_bo_32bit_addr for details.
+ * For VCE2+, this is not needed so return zero.
+ */
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev)
+{
+	/* VCE IP block not added yet, so can't use amdgpu_ip_version */
+	if (adev->family == AMDGPU_FAMILY_SI)
+		return 512;
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_get_create_msg - generate a VCE create msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @fence: optional fence to return
+ *
+ * Open up a stream for HW test
+ */
+static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+				     struct dma_fence **fence)
+{
+	const unsigned int ib_size_dw = 1024;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct amdgpu_ib ib_msg;
+	struct dma_fence *f = NULL;
+	uint64_t addr;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	memset(&ib_msg, 0, sizeof(ib_msg));
+	/* only one gpu page is needed, alloc +1 page to make addr aligned. */
+	r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+			  AMDGPU_IB_POOL_DIRECT,
+			  &ib_msg);
+	if (r)
+		goto err;
+
+	ib = &job->ibs[0];
+	/* let addr point to page boundary */
+	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
+
+	/* stitch together an VCE create msg */
+	ib->length_dw = 0;
+	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
+	ib->ptr[ib->length_dw++] = handle;
+
+	if ((ring->adev->vce.fw_version >> 24) >= 52)
+		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
+	else
+		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
+	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000042;
+	ib->ptr[ib->length_dw++] = 0x0000000a;
+	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = 0x00000080;
+	ib->ptr[ib->length_dw++] = 0x00000060;
+	ib->ptr[ib->length_dw++] = 0x00000100;
+	ib->ptr[ib->length_dw++] = 0x00000100;
+	ib->ptr[ib->length_dw++] = 0x0000000c;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	if ((ring->adev->vce.fw_version >> 24) >= 52) {
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+	}
+
+	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
+	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
+	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+	ib->ptr[ib->length_dw++] = addr;
+	ib->ptr[ib->length_dw++] = 0x00000001;
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	amdgpu_ib_free(&ib_msg, f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+/**
+ * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
+ *
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @direct: direct or delayed pool
+ * @fence: optional fence to return
+ *
+ * Close up a stream for HW test or if userspace failed to do so
+ */
+static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+				      bool direct, struct dma_fence **fence)
+{
+	const unsigned int ib_size_dw = 1024;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     ib_size_dw * 4,
+				     direct ? AMDGPU_IB_POOL_DIRECT :
+				     AMDGPU_IB_POOL_DELAYED, &job,
+				     AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+
+	/* stitch together an VCE destroy msg */
+	ib->length_dw = 0;
+	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
+	ib->ptr[ib->length_dw++] = handle;
+
+	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
+	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	if (direct)
+		r = amdgpu_job_submit_direct(job, ring, &f);
+	else
+		f = amdgpu_job_submit(job);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+/**
+ * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
+ *
+ * @p: cs parser
+ * @ib: indirect buffer to use
+ * @lo: address of lower dword
+ * @hi: address of higher dword
+ * @size: minimum size
+ * @index: bs/fb index
+ *
+ * Make sure that no BO cross a 4GB boundary.
+ */
+static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p,
+				  struct amdgpu_ib *ib, int lo, int hi,
+				  unsigned int size, int32_t index)
+{
+	int64_t offset = ((uint64_t)size) * ((int64_t)index);
+	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_va_mapping *mapping;
+	unsigned int i, fpfn, lpfn;
+	struct amdgpu_bo *bo;
+	uint64_t addr;
+	int r;
+
+	addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+	       ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
+	if (index >= 0) {
+		addr += offset;
+		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
+		lpfn = 0x100000000ULL >> PAGE_SHIFT;
+	} else {
+		fpfn = 0;
+		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
+	}
+
+	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
+			  addr, lo, hi, size, index);
+		return r;
+	}
+
+	for (i = 0; i < bo->placement.num_placement; ++i) {
+		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
+		bo->placements[i].lpfn = bo->placements[i].lpfn ?
+			min(bo->placements[i].lpfn, lpfn) : lpfn;
+	}
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+
+/**
+ * amdgpu_vce_cs_reloc - command submission relocation
+ *
+ * @p: parser context
+ * @ib: indirect buffer to use
+ * @lo: address of lower dword
+ * @hi: address of higher dword
+ * @size: minimum size
+ * @index: bs/fb index
+ *
+ * Patch relocation inside command stream with real buffer address
+ */
+static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib,
+			       int lo, int hi, unsigned int size, uint32_t index)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo;
+	uint64_t addr;
+	int r;
+
+	if (index == 0xffffffff)
+		index = 0;
+
+	addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) |
+	       ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32;
+	addr += ((uint64_t)size) * ((uint64_t)index);
+
+	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
+	if (r) {
+		DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n",
+			  addr, lo, hi, size, index);
+		return r;
+	}
+
+	if ((addr + (uint64_t)size) >
+	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+		DRM_ERROR("BO too small for addr 0x%010llx %d %d\n",
+			  addr, lo, hi);
+		return -EINVAL;
+	}
+
+	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
+	addr += amdgpu_bo_gpu_offset(bo);
+	addr -= ((uint64_t)size) * ((uint64_t)index);
+
+	amdgpu_ib_set_value(ib, lo, lower_32_bits(addr));
+	amdgpu_ib_set_value(ib, hi, upper_32_bits(addr));
+
+	return 0;
+}
+
+/**
+ * amdgpu_vce_validate_handle - validate stream handle
+ *
+ * @p: parser context
+ * @handle: handle to validate
+ * @allocated: allocated a new handle?
+ *
+ * Validates the handle and return the found session index or -EINVAL
+ * we don't have another free session index.
+ */
+static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
+				      uint32_t handle, uint32_t *allocated)
+{
+	unsigned int i;
+
+	/* validate the handle */
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
+			if (p->adev->vce.filp[i] != p->filp) {
+				DRM_ERROR("VCE handle collision detected!\n");
+				return -EINVAL;
+			}
+			return i;
+		}
+	}
+
+	/* handle not found try to alloc a new one */
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
+		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
+			p->adev->vce.filp[i] = p->filp;
+			p->adev->vce.img_size[i] = 0;
+			*allocated |= 1 << i;
+			return i;
+		}
+	}
+
+	DRM_ERROR("No more free VCE handles!\n");
+	return -EINVAL;
+}
+
+/**
+ * amdgpu_vce_ring_parse_cs - parse and validate the command stream
+ *
+ * @p: parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ */
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p,
+			     struct amdgpu_job *job,
+			     struct amdgpu_ib *ib)
+{
+	unsigned int fb_idx = 0, bs_idx = 0;
+	int session_idx = -1;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
+	uint32_t tmp, handle = 0;
+	uint32_t dummy = 0xffffffff;
+	uint32_t *size = &dummy;
+	unsigned int idx;
+	int i, r = 0;
+
+	job->vm = NULL;
+
+	for (idx = 0; idx < ib->length_dw;) {
+		uint32_t len = amdgpu_ib_get_value(ib, idx);
+		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+		if ((len < 8) || (len & 3)) {
+			DRM_ERROR("invalid VCE command length (%d)!\n", len);
+			r = -EINVAL;
+			goto out;
+		}
+
+		switch (cmd) {
+		case 0x00000002: /* task info */
+			fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+			bs_idx = amdgpu_ib_get_value(ib, idx + 7);
+			break;
+
+		case 0x03000001: /* encode */
+			r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9,
+						   0, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11,
+						   0, 0);
+			if (r)
+				goto out;
+			break;
+
+		case 0x05000001: /* context buffer */
+			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+						   0, 0);
+			if (r)
+				goto out;
+			break;
+
+		case 0x05000004: /* video bitstream buffer */
+			tmp = amdgpu_ib_get_value(ib, idx + 4);
+			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+						   tmp, bs_idx);
+			if (r)
+				goto out;
+			break;
+
+		case 0x05000005: /* feedback buffer */
+			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+						   4096, fb_idx);
+			if (r)
+				goto out;
+			break;
+
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2,
+						   0, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7,
+						   0, 0);
+			if (r)
+				goto out;
+			break;
+		}
+
+		idx += len / 4;
+	}
+
+	for (idx = 0; idx < ib->length_dw;) {
+		uint32_t len = amdgpu_ib_get_value(ib, idx);
+		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+		switch (cmd) {
+		case 0x00000001: /* session */
+			handle = amdgpu_ib_get_value(ib, idx + 2);
+			session_idx = amdgpu_vce_validate_handle(p, handle,
+								 &allocated);
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
+			size = &p->adev->vce.img_size[session_idx];
+			break;
+
+		case 0x00000002: /* task info */
+			fb_idx = amdgpu_ib_get_value(ib, idx + 6);
+			bs_idx = amdgpu_ib_get_value(ib, idx + 7);
+			break;
+
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
+				DRM_ERROR("Handle already in use!\n");
+				r = -EINVAL;
+				goto out;
+			}
+
+			*size = amdgpu_ib_get_value(ib, idx + 8) *
+				amdgpu_ib_get_value(ib, idx + 10) *
+				8 * 3 / 2;
+			break;
+
+		case 0x04000001: /* config extension */
+		case 0x04000002: /* pic control */
+		case 0x04000005: /* rate control */
+		case 0x04000007: /* motion estimation */
+		case 0x04000008: /* rdo */
+		case 0x04000009: /* vui */
+		case 0x05000002: /* auxiliary buffer */
+		case 0x05000009: /* clock table */
+			break;
+
+		case 0x0500000c: /* hw config */
+			switch (p->adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+			case CHIP_KAVERI:
+			case CHIP_MULLINS:
+#endif
+			case CHIP_CARRIZO:
+				break;
+			default:
+				r = -EINVAL;
+				goto out;
+			}
+			break;
+
+		case 0x03000001: /* encode */
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9,
+						*size, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11,
+						*size / 3, 0);
+			if (r)
+				goto out;
+			break;
+
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
+			break;
+
+		case 0x05000001: /* context buffer */
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+						*size * 2, 0);
+			if (r)
+				goto out;
+			break;
+
+		case 0x05000004: /* video bitstream buffer */
+			tmp = amdgpu_ib_get_value(ib, idx + 4);
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+						tmp, bs_idx);
+			if (r)
+				goto out;
+			break;
+
+		case 0x05000005: /* feedback buffer */
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2,
+						4096, fb_idx);
+			if (r)
+				goto out;
+			break;
+
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 3,
+						idx + 2, *size, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_cs_reloc(p, ib, idx + 8,
+						idx + 7, *size / 12, 0);
+			if (r)
+				goto out;
+			break;
+
+		default:
+			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (session_idx == -1) {
+			DRM_ERROR("no session command at start of IB\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		idx += len / 4;
+	}
+
+	if (allocated & ~created) {
+		DRM_ERROR("New session without create command!\n");
+		r = -ENOENT;
+	}
+
+out:
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
+	}
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
+	return r;
+}
+
+/**
+ * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
+ *
+ * @p: parser context
+ * @job: the job to parse
+ * @ib: the IB to patch
+ */
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+				struct amdgpu_job *job,
+				struct amdgpu_ib *ib)
+{
+	int session_idx = -1;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
+	uint32_t tmp, handle = 0;
+	int i, r = 0, idx = 0;
+
+	while (idx < ib->length_dw) {
+		uint32_t len = amdgpu_ib_get_value(ib, idx);
+		uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1);
+
+		if ((len < 8) || (len & 3)) {
+			DRM_ERROR("invalid VCE command length (%d)!\n", len);
+			r = -EINVAL;
+			goto out;
+		}
+
+		switch (cmd) {
+		case 0x00000001: /* session */
+			handle = amdgpu_ib_get_value(ib, idx + 2);
+			session_idx = amdgpu_vce_validate_handle(p, handle,
+								 &allocated);
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
+			break;
+
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
+				DRM_ERROR("Handle already in use!\n");
+				r = -EINVAL;
+				goto out;
+			}
+
+			break;
+
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
+			break;
+
+		default:
+			break;
+		}
+
+		if (session_idx == -1) {
+			DRM_ERROR("no session command at start of IB\n");
+			r = -EINVAL;
+			goto out;
+		}
+
+		idx += len / 4;
+	}
+
+	if (allocated & ~created) {
+		DRM_ERROR("New session without create command!\n");
+		r = -ENOENT;
+	}
+
+out:
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
+	}
+
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
+	return r;
+}
+
+/**
+ * amdgpu_vce_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: engine to use
+ * @job: job to retrieve vmid from
+ * @ib: the IB to execute
+ * @flags: unused
+ *
+ */
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
+				struct amdgpu_job *job,
+				struct amdgpu_ib *ib,
+				uint32_t flags)
+{
+	amdgpu_ring_write(ring, VCE_CMD_IB);
+	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * amdgpu_vce_ring_emit_fence - add a fence command to the ring
+ *
+ * @ring: engine to use
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ */
+void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				unsigned int flags)
+{
+	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	amdgpu_ring_write(ring, VCE_CMD_FENCE);
+	amdgpu_ring_write(ring, addr);
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, seq);
+	amdgpu_ring_write(ring, VCE_CMD_TRAP);
+	amdgpu_ring_write(ring, VCE_CMD_END);
+}
+
+/**
+ * amdgpu_vce_ring_test_ring - test if VCE ring is working
+ *
+ * @ring: the engine to test on
+ *
+ */
+int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t rptr;
+	unsigned int i;
+	int r, timeout = adev->usec_timeout;
+
+	/* skip ring test for sriov*/
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	r = amdgpu_ring_alloc(ring, 16);
+	if (r)
+		return r;
+
+	rptr = amdgpu_ring_get_rptr(ring);
+
+	amdgpu_ring_write(ring, VCE_CMD_END);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < timeout; i++) {
+		if (amdgpu_ring_get_rptr(ring) != rptr)
+			break;
+		udelay(1);
+	}
+
+	if (i >= timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+/**
+ * amdgpu_vce_ring_test_ib - test if VCE IBs are working
+ *
+ * @ring: the engine to test on
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ */
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence = NULL;
+	long r;
+
+	/* skip vce ring1/2 ib test for now, since it's not reliable */
+	if (ring != &ring->adev->vce.ring[0])
+		return 0;
+
+	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+	if (r)
+		goto error;
+
+	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	else if (r > 0)
+		r = 0;
+
+error:
+	dma_fence_put(fence);
+	return r;
+}
+
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
+{
+	switch (ring) {
+	case 0:
+		return AMDGPU_RING_PRIO_0;
+	case 1:
+		return AMDGPU_RING_PRIO_1;
+	case 2:
+		return AMDGPU_RING_PRIO_2;
+	default:
+		return AMDGPU_RING_PRIO_0;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
new file mode 100644
index 000000000000..1c3464ce5037
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCE_H__
+#define __AMDGPU_VCE_H__
+
+#define AMDGPU_MAX_VCE_HANDLES	16
+#define AMDGPU_VCE_FIRMWARE_OFFSET 256
+
+#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
+#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
+
+#define AMDGPU_VCE_FW_53_45	((53 << 24) | (45 << 16))
+
+struct amdgpu_vce {
+	struct amdgpu_bo	*vcpu_bo;
+	uint64_t		gpu_addr;
+	void			*cpu_addr;
+	void			*saved_bo;
+	unsigned		fw_version;
+	unsigned		fb_version;
+	atomic_t		handles[AMDGPU_MAX_VCE_HANDLES];
+	struct drm_file		*filp[AMDGPU_MAX_VCE_HANDLES];
+	uint32_t		img_size[AMDGPU_MAX_VCE_HANDLES];
+	struct delayed_work	idle_work;
+	struct mutex		idle_mutex;
+	const struct firmware	*fw;	/* VCE firmware */
+	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
+	struct amdgpu_irq_src	irq;
+	unsigned		harvest_config;
+	struct drm_sched_entity	entity;
+	uint32_t                srbm_soft_reset;
+	unsigned		num_rings;
+	uint32_t		keyselect;
+};
+
+int amdgpu_vce_early_init(struct amdgpu_device *adev);
+int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
+int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+int amdgpu_vce_suspend(struct amdgpu_device *adev);
+int amdgpu_vce_resume(struct amdgpu_device *adev);
+void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
+u32 amdgpu_vce_required_gart_pages(struct amdgpu_device *adev);
+int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
+			     struct amdgpu_ib *ib);
+int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p,
+				struct amdgpu_job *job,
+				struct amdgpu_ib *ib);
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
+				struct amdgpu_ib *ib, uint32_t flags);
+void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				unsigned flags);
+int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
+unsigned amdgpu_vce_ring_get_emit_ib_size(struct amdgpu_ring *ring);
+unsigned amdgpu_vce_ring_get_dma_frame_size(struct amdgpu_ring *ring);
+enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
new file mode 100644
index 000000000000..5e0786ea911b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -0,0 +1,1637 @@
+/*
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vcn.h"
+#include "soc15d.h"
+
+/* Firmware Names */
+#define FIRMWARE_RAVEN			"amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO		"amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2			"amdgpu/raven2_vcn.bin"
+#define FIRMWARE_ARCTURUS		"amdgpu/arcturus_vcn.bin"
+#define FIRMWARE_RENOIR			"amdgpu/renoir_vcn.bin"
+#define FIRMWARE_GREEN_SARDINE		"amdgpu/green_sardine_vcn.bin"
+#define FIRMWARE_NAVI10			"amdgpu/navi10_vcn.bin"
+#define FIRMWARE_NAVI14			"amdgpu/navi14_vcn.bin"
+#define FIRMWARE_NAVI12			"amdgpu/navi12_vcn.bin"
+#define FIRMWARE_SIENNA_CICHLID		"amdgpu/sienna_cichlid_vcn.bin"
+#define FIRMWARE_NAVY_FLOUNDER		"amdgpu/navy_flounder_vcn.bin"
+#define FIRMWARE_VANGOGH		"amdgpu/vangogh_vcn.bin"
+#define FIRMWARE_DIMGREY_CAVEFISH	"amdgpu/dimgrey_cavefish_vcn.bin"
+#define FIRMWARE_ALDEBARAN		"amdgpu/aldebaran_vcn.bin"
+#define FIRMWARE_BEIGE_GOBY		"amdgpu/beige_goby_vcn.bin"
+#define FIRMWARE_YELLOW_CARP		"amdgpu/yellow_carp_vcn.bin"
+#define FIRMWARE_VCN_3_1_2		"amdgpu/vcn_3_1_2.bin"
+#define FIRMWARE_VCN4_0_0		"amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2		"amdgpu/vcn_4_0_2.bin"
+#define FIRMWARE_VCN4_0_3		"amdgpu/vcn_4_0_3.bin"
+#define FIRMWARE_VCN4_0_4		"amdgpu/vcn_4_0_4.bin"
+#define FIRMWARE_VCN4_0_5		"amdgpu/vcn_4_0_5.bin"
+#define FIRMWARE_VCN4_0_6		"amdgpu/vcn_4_0_6.bin"
+#define FIRMWARE_VCN4_0_6_1		"amdgpu/vcn_4_0_6_1.bin"
+#define FIRMWARE_VCN5_0_0		"amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1		"amdgpu/vcn_5_0_1.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
+MODULE_FIRMWARE(FIRMWARE_PICASSO);
+MODULE_FIRMWARE(FIRMWARE_RAVEN2);
+MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
+MODULE_FIRMWARE(FIRMWARE_RENOIR);
+MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
+MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
+MODULE_FIRMWARE(FIRMWARE_NAVI10);
+MODULE_FIRMWARE(FIRMWARE_NAVI14);
+MODULE_FIRMWARE(FIRMWARE_NAVI12);
+MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
+MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
+MODULE_FIRMWARE(FIRMWARE_VANGOGH);
+MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
+MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
+MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
+MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev);
+
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
+{
+	char ucode_prefix[25];
+	int r;
+
+	adev->vcn.inst[i].adev = adev;
+	adev->vcn.inst[i].inst = i;
+	amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+	if (i != 0 && adev->vcn.per_inst_fw) {
+		r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+					 AMDGPU_UCODE_REQUIRED,
+					 "amdgpu/%s_%d.bin", ucode_prefix, i);
+		if (r)
+			amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+	} else {
+		if (!adev->vcn.inst[0].fw) {
+			r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
+						 AMDGPU_UCODE_REQUIRED,
+						 "amdgpu/%s.bin", ucode_prefix);
+			if (r)
+				amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+		} else {
+			r = 0;
+		}
+		adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
+	}
+
+	return r;
+}
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
+{
+	unsigned long bo_size;
+	const struct common_firmware_header *hdr;
+	unsigned char fw_check;
+	unsigned int fw_shared_size, log_offset;
+	int r;
+
+	mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+	mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+	mutex_init(&adev->vcn.inst[i].engine_reset_mutex);
+	atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+	INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+	atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+	    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+		adev->vcn.inst[i].indirect_sram = true;
+
+	/*
+	 * Some Steam Deck's BIOS versions are incompatible with the
+	 * indirect SRAM mode, leading to amdgpu being unable to get
+	 * properly probed (and even potentially crashing the kernel).
+	 * Hence, check for these versions here - notice this is
+	 * restricted to Vangogh (Deck's APU).
+	 */
+	if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) {
+		const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+		if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+				 !strncmp("F7A0114", bios_ver, 7))) {
+			adev->vcn.inst[i].indirect_sram = false;
+			dev_info(adev->dev,
+				 "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+		}
+	}
+
+	/* from vcn4 and above, only unified queue is used */
+	adev->vcn.inst[i].using_unified_queue =
+		amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
+
+	hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+	adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
+	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
+
+	/* Bit 20-23, it is encode major and non-zero for new naming convention.
+	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
+	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
+	 * is zero in old naming convention, this field is always zero so far.
+	 * These four bits are used to tell which naming convention is present.
+	 */
+	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
+	if (fw_check) {
+		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;
+
+		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
+		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
+		enc_major = fw_check;
+		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
+		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
+		dev_info(adev->dev,
+			 "[VCN instance %d] Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+			 i, enc_major, enc_minor, dec_ver, vep, fw_rev);
+	} else {
+		unsigned int version_major, version_minor, family_id;
+
+		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+		dev_info(adev->dev, "[VCN instance %d] Found VCN firmware Version: %u.%u Family ID: %u\n",
+			 i, version_major, version_minor, family_id);
+	}
+
+	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+	if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) {
+		fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared));
+		log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log);
+	} else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
+		fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
+		log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
+	} else {
+		fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
+		log_offset = offsetof(struct amdgpu_fw_shared, fw_log);
+	}
+
+	bo_size += fw_shared_size;
+
+	if (amdgpu_vcnfw_log)
+		bo_size += AMDGPU_VCNFW_LOG_SIZE;
+
+	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->vcn.inst[i].vcpu_bo,
+				    &adev->vcn.inst[i].gpu_addr,
+				    &adev->vcn.inst[i].cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+		return r;
+	}
+
+	adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+		bo_size - fw_shared_size;
+	adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+		bo_size - fw_shared_size;
+
+	adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+	if (amdgpu_vcnfw_log) {
+		adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+		adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+		adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+	}
+
+	if (adev->vcn.inst[i].indirect_sram) {
+		r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM |
+					    AMDGPU_GEM_DOMAIN_GTT,
+					    &adev->vcn.inst[i].dpg_sram_bo,
+					    &adev->vcn.inst[i].dpg_sram_gpu_addr,
+					    &adev->vcn.inst[i].dpg_sram_cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
+{
+	int j;
+
+	if (adev->vcn.harvest_config & (1 << i))
+		return;
+
+	amdgpu_bo_free_kernel(
+		&adev->vcn.inst[i].dpg_sram_bo,
+		&adev->vcn.inst[i].dpg_sram_gpu_addr,
+		(void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
+
+	kvfree(adev->vcn.inst[i].saved_bo);
+
+	amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+			      &adev->vcn.inst[i].gpu_addr,
+			      (void **)&adev->vcn.inst[i].cpu_addr);
+
+	amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
+
+	for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+		amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
+
+	if (adev->vcn.per_inst_fw) {
+		amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+	} else {
+		amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+		adev->vcn.inst[i].fw = NULL;
+	}
+
+	if (adev->vcn.reg_list)
+		amdgpu_vcn_reg_dump_fini(adev);
+
+	mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+	mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+}
+
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
+{
+	bool ret = false;
+	int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
+
+	if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
+		ret = true;
+	else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK))
+		ret = true;
+	else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK))
+		ret = true;
+
+	return ret;
+}
+
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
+{
+	unsigned int size;
+	void *ptr;
+	int idx;
+
+	if (adev->vcn.harvest_config & (1 << i))
+		return 0;
+	if (adev->vcn.inst[i].vcpu_bo == NULL)
+		return 0;
+
+	size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+	ptr = adev->vcn.inst[i].cpu_addr;
+
+	adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+	if (!adev->vcn.inst[i].saved_bo)
+		return -ENOMEM;
+
+	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+		drm_dev_exit(idx);
+	}
+
+	return 0;
+}
+
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+{
+	int ret, i;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
+{
+	bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+	if (adev->vcn.harvest_config & (1 << i))
+		return 0;
+
+	/* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
+	 * restore fw data and clear buffer in amdgpu_vcn_resume() */
+	if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+		return 0;
+
+	return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
+{
+	unsigned int size;
+	void *ptr;
+	int idx;
+
+	if (adev->vcn.harvest_config & (1 << i))
+		return 0;
+	if (adev->vcn.inst[i].vcpu_bo == NULL)
+		return -EINVAL;
+
+	size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+	ptr = adev->vcn.inst[i].cpu_addr;
+
+	if (adev->vcn.inst[i].saved_bo != NULL) {
+		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+			memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+			drm_dev_exit(idx);
+		}
+		kvfree(adev->vcn.inst[i].saved_bo);
+		adev->vcn.inst[i].saved_bo = NULL;
+	} else {
+		const struct common_firmware_header *hdr;
+		unsigned int offset;
+
+		hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+			if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+				memcpy_toio(adev->vcn.inst[i].cpu_addr,
+					    adev->vcn.inst[i].fw->data + offset,
+					    le32_to_cpu(hdr->ucode_size_bytes));
+				drm_dev_exit(idx);
+			}
+			size -= le32_to_cpu(hdr->ucode_size_bytes);
+			ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		}
+		memset_io(ptr, 0, size);
+	}
+
+	return 0;
+}
+
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev)
+{
+	int r;
+
+	mutex_lock(&adev->vcn.workload_profile_mutex);
+
+	if (adev->vcn.workload_profile_active) {
+		mutex_unlock(&adev->vcn.workload_profile_mutex);
+		return;
+	}
+	r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+					    true);
+	if (r)
+		dev_warn(adev->dev,
+			 "(%d) failed to enable video power profile mode\n", r);
+	else
+		adev->vcn.workload_profile_active = true;
+	mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
+
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev)
+{
+	bool pg = true;
+	int r, i;
+
+	mutex_lock(&adev->vcn.workload_profile_mutex);
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		if (adev->vcn.inst[i].cur_state != AMD_PG_STATE_GATE) {
+			pg = false;
+			break;
+		}
+	}
+
+	if (pg) {
+		r = amdgpu_dpm_switch_power_profile(
+			adev, PP_SMC_POWER_PROFILE_VIDEO, false);
+		if (r)
+			dev_warn(
+				adev->dev,
+				"(%d) failed to disable video power profile mode\n",
+				r);
+		else
+			adev->vcn.workload_profile_active = false;
+	}
+
+	mutex_unlock(&adev->vcn.workload_profile_mutex);
+}
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_vcn_inst *vcn_inst =
+		container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+	struct amdgpu_device *adev = vcn_inst->adev;
+	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+	unsigned int i = vcn_inst->inst, j;
+
+	if (adev->vcn.harvest_config & (1 << i))
+		return;
+
+	for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+		fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
+
+	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+	    !adev->vcn.inst[i].using_unified_queue) {
+		struct dpg_pause_state new_state;
+
+		if (fence[i] ||
+		    unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else
+			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+		adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
+	}
+
+	fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+	fences += fence[i];
+
+	if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+		mutex_lock(&vcn_inst->vcn_pg_lock);
+		vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+		mutex_unlock(&vcn_inst->vcn_pg_lock);
+		amdgpu_vcn_put_profile(adev);
+
+	} else {
+		schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
+	}
+}
+
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
+
+	atomic_inc(&vcn_inst->total_submission_cnt);
+
+	cancel_delayed_work_sync(&vcn_inst->idle_work);
+
+	mutex_lock(&vcn_inst->vcn_pg_lock);
+	vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
+
+	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+	    !vcn_inst->using_unified_queue) {
+		struct dpg_pause_state new_state;
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+			atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		} else {
+			unsigned int fences = 0;
+			unsigned int i;
+
+			for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+				fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
+
+			if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
+				new_state.fw_based = VCN_DPG_STATE__PAUSE;
+			else
+				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+		}
+
+		vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
+	}
+	mutex_unlock(&vcn_inst->vcn_pg_lock);
+	amdgpu_vcn_get_profile(adev);
+}
+
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+	    ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+	    !adev->vcn.inst[ring->me].using_unified_queue)
+		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+	atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
+
+	schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+			      VCN_IDLE_TIMEOUT);
+}
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned int i;
+	int r;
+
+	/* VCN in SRIOV does not support direct register read/write */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 3);
+	if (r)
+		return r;
+	amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
+		if (tmp == 0xDEADBEEF)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t rptr;
+	unsigned int i;
+	int r;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	r = amdgpu_ring_alloc(ring, 16);
+	if (r)
+		return r;
+
+	rptr = amdgpu_ring_get_rptr(ring);
+
+	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (amdgpu_ring_get_rptr(ring) != rptr)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
+				   struct amdgpu_ib *ib_msg,
+				   struct dma_fence **fence)
+{
+	u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     64, AMDGPU_IB_POOL_DIRECT,
+				     &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		goto err;
+
+	ib = &job->ibs[0];
+	ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
+	ib->ptr[1] = addr;
+	ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
+	ib->ptr[3] = addr >> 32;
+	ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
+	ib->ptr[5] = 0;
+	for (i = 6; i < 16; i += 2) {
+		ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err_free;
+
+	amdgpu_ib_free(ib_msg, f);
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err_free:
+	amdgpu_job_free(job);
+err:
+	amdgpu_ib_free(ib_msg, f);
+	return r;
+}
+
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+		struct amdgpu_ib *ib)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t *msg;
+	int r, i;
+
+	memset(ib, 0, sizeof(*ib));
+	r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+			AMDGPU_IB_POOL_DIRECT,
+			ib);
+	if (r)
+		return r;
+
+	msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+	msg[0] = cpu_to_le32(0x00000028);
+	msg[1] = cpu_to_le32(0x00000038);
+	msg[2] = cpu_to_le32(0x00000001);
+	msg[3] = cpu_to_le32(0x00000000);
+	msg[4] = cpu_to_le32(handle);
+	msg[5] = cpu_to_le32(0x00000000);
+	msg[6] = cpu_to_le32(0x00000001);
+	msg[7] = cpu_to_le32(0x00000028);
+	msg[8] = cpu_to_le32(0x00000010);
+	msg[9] = cpu_to_le32(0x00000000);
+	msg[10] = cpu_to_le32(0x00000007);
+	msg[11] = cpu_to_le32(0x00000000);
+	msg[12] = cpu_to_le32(0x00000780);
+	msg[13] = cpu_to_le32(0x00000440);
+	for (i = 14; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	return 0;
+}
+
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+					  struct amdgpu_ib *ib)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t *msg;
+	int r, i;
+
+	memset(ib, 0, sizeof(*ib));
+	r = amdgpu_ib_get(adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
+			AMDGPU_IB_POOL_DIRECT,
+			ib);
+	if (r)
+		return r;
+
+	msg = (uint32_t *)AMDGPU_GPU_PAGE_ALIGN((unsigned long)ib->ptr);
+	msg[0] = cpu_to_le32(0x00000028);
+	msg[1] = cpu_to_le32(0x00000018);
+	msg[2] = cpu_to_le32(0x00000000);
+	msg[3] = cpu_to_le32(0x00000002);
+	msg[4] = cpu_to_le32(handle);
+	msg[5] = cpu_to_le32(0x00000000);
+	for (i = 6; i < 1024; ++i)
+		msg[i] = cpu_to_le32(0x0);
+
+	return 0;
+}
+
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence = NULL;
+	struct amdgpu_ib ib;
+	long r;
+
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
+	if (r)
+		goto error;
+
+	r = amdgpu_vcn_dec_send_msg(ring, &ib, NULL);
+	if (r)
+		goto error;
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
+	if (r)
+		goto error;
+
+	r = amdgpu_vcn_dec_send_msg(ring, &ib, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	else if (r > 0)
+		r = 0;
+
+	dma_fence_put(fence);
+error:
+	return r;
+}
+
+static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
+						uint32_t ib_pack_in_dw, bool enc)
+{
+	uint32_t *ib_checksum;
+
+	ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
+	ib->ptr[ib->length_dw++] = 0x30000002;
+	ib_checksum = &ib->ptr[ib->length_dw++];
+	ib->ptr[ib->length_dw++] = ib_pack_in_dw;
+
+	ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
+	ib->ptr[ib->length_dw++] = 0x30000001;
+	ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
+	ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
+
+	return ib_checksum;
+}
+
+static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
+						uint32_t ib_pack_in_dw)
+{
+	uint32_t i;
+	uint32_t checksum = 0;
+
+	for (i = 0; i < ib_pack_in_dw; i++)
+		checksum += *(*ib_checksum + 2 + i);
+
+	**ib_checksum = checksum;
+}
+
+static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
+				      struct amdgpu_ib *ib_msg,
+				      struct dma_fence **fence)
+{
+	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+	unsigned int ib_size_dw = 64;
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *f = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+	uint32_t *ib_checksum;
+	uint32_t ib_pack_in_dw;
+	int i, r;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		ib_size_dw += 8;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		goto err;
+
+	ib = &job->ibs[0];
+	ib->length_dw = 0;
+
+	/* single queue headers */
+	if (adev->vcn.inst[ring->me].using_unified_queue) {
+		ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+						+ 4 + 2; /* engine info + decoding ib in dw */
+		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
+	}
+
+	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
+	ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
+	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
+	ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
+	memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));
+
+	decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
+	decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
+	decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err_free;
+
+	amdgpu_ib_free(ib_msg, f);
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err_free:
+	amdgpu_job_free(job);
+err:
+	amdgpu_ib_free(ib_msg, f);
+	return r;
+}
+
+int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct dma_fence *fence = NULL;
+	struct amdgpu_ib ib;
+	long r;
+
+	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &ib);
+	if (r)
+		goto error;
+
+	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, NULL);
+	if (r)
+		goto error;
+	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &ib);
+	if (r)
+		goto error;
+
+	r = amdgpu_vcn_dec_sw_send_msg(ring, &ib, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	else if (r > 0)
+		r = 0;
+
+	dma_fence_put(fence);
+error:
+	return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t rptr;
+	unsigned int i;
+	int r;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	r = amdgpu_ring_alloc(ring, 16);
+	if (r)
+		return r;
+
+	rptr = amdgpu_ring_get_rptr(ring);
+
+	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (amdgpu_ring_get_rptr(ring) != rptr)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+					 struct amdgpu_ib *ib_msg,
+					 struct dma_fence **fence)
+{
+	unsigned int ib_size_dw = 16;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	uint32_t *ib_checksum = NULL;
+	uint64_t addr;
+	int i, r;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		ib_size_dw += 8;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+
+	ib->length_dw = 0;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
+	ib->ptr[ib->length_dw++] = 0x00000018;
+	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
+	ib->ptr[ib->length_dw++] = handle;
+	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+	ib->ptr[ib->length_dw++] = addr;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000014;
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0x0000001c;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000008;
+	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+					  struct amdgpu_ib *ib_msg,
+					  struct dma_fence **fence)
+{
+	unsigned int ib_size_dw = 16;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	uint32_t *ib_checksum = NULL;
+	uint64_t addr;
+	int i, r;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		ib_size_dw += 8;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job, AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+
+	ib->length_dw = 0;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
+	ib->ptr[ib->length_dw++] = 0x00000018;
+	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = handle;
+	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+	ib->ptr[ib->length_dw++] = addr;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000014;
+	ib->ptr[ib->length_dw++] = 0x00000002;
+	ib->ptr[ib->length_dw++] = 0x0000001c;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+
+	ib->ptr[ib->length_dw++] = 0x00000008;
+	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
+
+	for (i = ib->length_dw; i < ib_size_dw; ++i)
+		ib->ptr[i] = 0x0;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct dma_fence *fence = NULL;
+	struct amdgpu_ib ib;
+	long r;
+
+	memset(&ib, 0, sizeof(ib));
+	r = amdgpu_ib_get(adev, NULL, (128 << 10) + AMDGPU_GPU_PAGE_SIZE,
+			AMDGPU_IB_POOL_DIRECT,
+			&ib);
+	if (r)
+		return r;
+
+	r = amdgpu_vcn_enc_get_create_msg(ring, 1, &ib, NULL);
+	if (r)
+		goto error;
+
+	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &ib, &fence);
+	if (r)
+		goto error;
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0)
+		r = -ETIMEDOUT;
+	else if (r > 0)
+		r = 0;
+
+error:
+	amdgpu_ib_free(&ib, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	long r;
+
+	if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+	    (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
+		r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+		if (r)
+			goto error;
+	}
+
+	r =  amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
+
+error:
+	return r;
+}
+
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
+{
+	switch (ring) {
+	case 0:
+		return AMDGPU_RING_PRIO_0;
+	case 1:
+		return AMDGPU_RING_PRIO_1;
+	case 2:
+		return AMDGPU_RING_PRIO_2;
+	default:
+		return AMDGPU_RING_PRIO_0;
+	}
+}
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
+{
+	unsigned int idx;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		const struct common_firmware_header *hdr;
+
+		if (adev->vcn.harvest_config & (1 << i))
+			return;
+
+		if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+		     amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+		    && (i > 0))
+			return;
+
+		hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+		/* currently only support 2 FW instances */
+		if (i >= 2) {
+			dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+			return;
+		}
+		idx = AMDGPU_UCODE_ID_VCN + i;
+		adev->firmware.ucode[idx].ucode_id = idx;
+		adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+	}
+}
+
+/*
+ * debugfs for mapping vcn firmware log buffer.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf,
+					     size_t size, loff_t *pos)
+{
+	struct amdgpu_vcn_inst *vcn;
+	void *log_buf;
+	struct amdgpu_vcn_fwlog *plog;
+	unsigned int read_pos, write_pos, available, i, read_bytes = 0;
+	unsigned int read_num[2] = {0};
+
+	vcn = file_inode(f)->i_private;
+	if (!vcn)
+		return -ENODEV;
+
+	if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log)
+		return -EFAULT;
+
+	log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+
+	plog = (struct amdgpu_vcn_fwlog *)log_buf;
+	read_pos = plog->rptr;
+	write_pos = plog->wptr;
+
+	if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE)
+		return -EFAULT;
+
+	if (!size || (read_pos == write_pos))
+		return 0;
+
+	if (write_pos > read_pos) {
+		available = write_pos - read_pos;
+		read_num[0] = min_t(size_t, size, available);
+	} else {
+		read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos;
+		available = read_num[0] + write_pos - plog->header_size;
+		if (size > available)
+			read_num[1] = write_pos - plog->header_size;
+		else if (size > read_num[0])
+			read_num[1] = size - read_num[0];
+		else
+			read_num[0] = size;
+	}
+
+	for (i = 0; i < 2; i++) {
+		if (read_num[i]) {
+			if (read_pos == AMDGPU_VCNFW_LOG_SIZE)
+				read_pos = plog->header_size;
+			if (read_num[i] == copy_to_user((buf + read_bytes),
+							(log_buf + read_pos), read_num[i]))
+				return -EFAULT;
+
+			read_bytes += read_num[i];
+			read_pos += read_num[i];
+		}
+	}
+
+	plog->rptr = read_pos;
+	*pos += read_bytes;
+	return read_bytes;
+}
+
+static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_vcn_fwlog_read,
+	.llseek = default_llseek
+};
+#endif
+
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i,
+				   struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	sprintf(name, "amdgpu_vcn_%d_fwlog", i);
+	debugfs_create_file_size(name, S_IFREG | 0444, root, vcn,
+				 &amdgpu_debugfs_vcnfwlog_fops,
+				 AMDGPU_VCNFW_LOG_SIZE);
+#endif
+}
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn)
+{
+#if defined(CONFIG_DEBUG_FS)
+	uint32_t *flag = vcn->fw_shared.cpu_addr;
+	void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size;
+	uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size;
+	struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr;
+	struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr
+							 + vcn->fw_shared.log_offset;
+	*flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG);
+	fw_log->is_enabled = 1;
+	fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF);
+	fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32);
+	fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE);
+
+	log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog);
+	log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE;
+	log_buf->rptr = log_buf->header_size;
+	log_buf->wptr = log_buf->header_size;
+	log_buf->wrapped = 0;
+#endif
+}
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	struct ras_common_if *ras_if = adev->vcn.ras_if;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	if (!ras_if)
+		return 0;
+
+	if (!amdgpu_sriov_vf(adev)) {
+		ih_data.head = *ras_if;
+		amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	} else {
+		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+			adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+		else
+			dev_warn(adev->dev,
+				"No ras_poison_handler interface in SRIOV for VCN!\n");
+	}
+
+	return 0;
+}
+
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r, i;
+
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
+		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+			if (adev->vcn.harvest_config & (1 << i) ||
+			    !adev->vcn.inst[i].ras_poison_irq.funcs)
+				continue;
+
+			r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+			if (r)
+				goto late_fini;
+		}
+	}
+	return 0;
+
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+	return r;
+}
+
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_vcn_ras *ras;
+
+	if (!adev->vcn.ras)
+		return 0;
+
+	ras = adev->vcn.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register vcn ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "vcn");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+	adev->vcn.ras_if = &ras->ras_block.ras_comm;
+
+	if (!ras->ras_block.ras_late_init)
+		ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init;
+
+	return 0;
+}
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+			       enum AMDGPU_UCODE_ID ucode_id)
+{
+	struct amdgpu_firmware_info ucode = {
+		.ucode_id = (ucode_id ? ucode_id :
+			    (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
+					AMDGPU_UCODE_ID_VCN0_RAM)),
+		.mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+		.ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+			      (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr),
+	};
+
+	return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset);
+}
+
+static DEVICE_ATTR(vcn_reset_mask, 0444,
+		   amdgpu_get_vcn_reset_mask, NULL);
+
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->vcn.num_vcn_inst) {
+		r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+	if (adev->dev->kobj.sd) {
+		if (adev->vcn.num_vcn_inst)
+			device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+	}
+}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+
+	mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+	if ((val & mask) == 0)
+		return -EINVAL;
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		ring = &adev->vcn.inst[i].ring_enc[0];
+		if (val & (1ULL << i))
+			ring->sched.ready = true;
+		else
+			ring->sched.ready = false;
+	}
+	/* publish sched.ready flag update effective immediately across smp */
+	smp_rmb();
+	return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	u32 i;
+	u64 mask = 0;
+	struct amdgpu_ring *ring;
+
+	if (!adev)
+		return -ENODEV;
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		ring = &adev->vcn.inst[i].ring_enc[0];
+		if (ring->sched.ready)
+			mask |= 1ULL << i;
+		}
+	*val = mask;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+			 amdgpu_debugfs_vcn_sched_mask_get,
+			 amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	struct drm_minor *minor = adev_to_drm(adev)->primary;
+	struct dentry *root = minor->debugfs_root;
+	char name[32];
+
+	if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
+		return;
+	sprintf(name, "amdgpu_vcn_sched_mask");
+	debugfs_create_file(name, 0600, root, adev,
+			    &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+			      enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret = 0, i;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+		ret |= vinst->set_pg_state(vinst, state);
+	}
+
+	return ret;
+}
+
+/**
+ * amdgpu_vcn_reset_engine - Reset a specific VCN engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: VCN engine instance to reset
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev,
+				   uint32_t instance_id)
+{
+	struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id];
+	int r, i;
+
+	mutex_lock(&vinst->engine_reset_mutex);
+	/* Stop the scheduler's work queue for the dec and enc rings if they are running.
+	 * This ensures that no new tasks are submitted to the queues while
+	 * the reset is in progress.
+	 */
+	drm_sched_wqueue_stop(&vinst->ring_dec.sched);
+	for (i = 0; i < vinst->num_enc_rings; i++)
+		drm_sched_wqueue_stop(&vinst->ring_enc[i].sched);
+
+	/* Perform the VCN reset for the specified instance */
+	r = vinst->reset(vinst);
+	if (r)
+		goto unlock;
+	r = amdgpu_ring_test_ring(&vinst->ring_dec);
+	if (r)
+		goto unlock;
+	for (i = 0; i < vinst->num_enc_rings; i++) {
+		r = amdgpu_ring_test_ring(&vinst->ring_enc[i]);
+		if (r)
+			goto unlock;
+	}
+	amdgpu_fence_driver_force_completion(&vinst->ring_dec);
+	for (i = 0; i < vinst->num_enc_rings; i++)
+		amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]);
+
+	/* Restart the scheduler's work queue for the dec and enc rings
+	 * if they were stopped by this function. This allows new tasks
+	 * to be submitted to the queues after the reset is complete.
+	 */
+	drm_sched_wqueue_start(&vinst->ring_dec.sched);
+	for (i = 0; i < vinst->num_enc_rings; i++)
+		drm_sched_wqueue_start(&vinst->ring_enc[i].sched);
+
+unlock:
+	mutex_unlock(&vinst->engine_reset_mutex);
+
+	return r;
+}
+
+/**
+ * amdgpu_vcn_ring_reset - Reset a VCN ring
+ * @ring: ring to reset
+ * @vmid: vmid of guilty job
+ * @timedout_fence: fence of timed out job
+ *
+ * This helper is for VCN blocks without unified queues because
+ * resetting the engine resets all queues in that case.  With
+ * unified queues we have one queue per engine.
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+			  unsigned int vmid,
+			  struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (adev->vcn.inst[ring->me].using_unified_queue)
+		return -EINVAL;
+
+	return amdgpu_vcn_reset_engine(adev, ring->me);
+}
+
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+			     const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+	adev->vcn.ip_dump = kcalloc(adev->vcn.num_vcn_inst * count,
+				     sizeof(uint32_t), GFP_KERNEL);
+	if (!adev->vcn.ip_dump)
+		return -ENOMEM;
+	adev->vcn.reg_list = reg;
+	adev->vcn.reg_count = count;
+
+	return 0;
+}
+
+static void amdgpu_vcn_reg_dump_fini(struct amdgpu_device *adev)
+{
+	kfree(adev->vcn.ip_dump);
+	adev->vcn.ip_dump = NULL;
+	adev->vcn.reg_list = NULL;
+	adev->vcn.reg_count = 0;
+}
+
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int i, j;
+	bool is_powered;
+	u32 inst_off;
+
+	if (!adev->vcn.ip_dump)
+		return;
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+
+		inst_off = i * adev->vcn.reg_count;
+		/* mmUVD_POWER_STATUS is always readable and is the first in reg_list */
+		adev->vcn.ip_dump[inst_off] =
+			RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[0], i));
+		is_powered = (adev->vcn.ip_dump[inst_off] &
+			      UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+			      UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+		if (is_powered)
+			for (j = 1; j < adev->vcn.reg_count; j++)
+				adev->vcn.ip_dump[inst_off + j] =
+				RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->vcn.reg_list[j], i));
+	}
+}
+
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int i, j;
+	bool is_powered;
+	u32 inst_off;
+
+	if (!adev->vcn.ip_dump)
+		return;
+
+	drm_printf(p, "num_instances:%d\n", adev->vcn.num_vcn_inst);
+	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+		if (adev->vcn.harvest_config & (1 << i)) {
+			drm_printf(p, "\nHarvested Instance:VCN%d Skipping dump\n", i);
+			continue;
+		}
+
+		inst_off = i * adev->vcn.reg_count;
+		is_powered = (adev->vcn.ip_dump[inst_off] &
+			      UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF) !=
+			      UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+
+		if (is_powered) {
+			drm_printf(p, "\nActive Instance:VCN%d\n", i);
+			for (j = 0; j < adev->vcn.reg_count; j++)
+				drm_printf(p, "%-50s \t 0x%08x\n", adev->vcn.reg_list[j].reg_name,
+					   adev->vcn.ip_dump[inst_off + j]);
+		} else {
+			drm_printf(p, "\nInactive Instance:VCN%d\n", i);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
new file mode 100644
index 000000000000..82624b44e661
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -0,0 +1,573 @@
+/*
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCN_H__
+#define __AMDGPU_VCN_H__
+
+#include "amdgpu_ras.h"
+
+#define AMDGPU_VCN_STACK_SIZE		(128*1024)
+#define AMDGPU_VCN_CONTEXT_SIZE 	(512*1024)
+
+#define AMDGPU_VCN_FIRMWARE_OFFSET	256
+#define AMDGPU_VCN_MAX_ENC_RINGS	3
+
+#define AMDGPU_MAX_VCN_INSTANCES	4
+#define AMDGPU_MAX_VCN_ENC_RINGS  (AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES)
+
+#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
+#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
+
+#define VCN_DEC_KMD_CMD 		0x80000000
+#define VCN_DEC_CMD_FENCE		0x00000000
+#define VCN_DEC_CMD_TRAP		0x00000001
+#define VCN_DEC_CMD_WRITE_REG		0x00000004
+#define VCN_DEC_CMD_REG_READ_COND_WAIT	0x00000006
+#define VCN_DEC_CMD_PACKET_START	0x0000000a
+#define VCN_DEC_CMD_PACKET_END		0x0000000b
+
+#define VCN_DEC_SW_CMD_NO_OP		0x00000000
+#define VCN_DEC_SW_CMD_END		0x00000001
+#define VCN_DEC_SW_CMD_IB		0x00000002
+#define VCN_DEC_SW_CMD_FENCE		0x00000003
+#define VCN_DEC_SW_CMD_TRAP		0x00000004
+#define VCN_DEC_SW_CMD_IB_AUTO		0x00000005
+#define VCN_DEC_SW_CMD_SEMAPHORE	0x00000006
+#define VCN_DEC_SW_CMD_PREEMPT_FENCE	0x00000009
+#define VCN_DEC_SW_CMD_REG_WRITE	0x0000000b
+#define VCN_DEC_SW_CMD_REG_WAIT		0x0000000c
+
+#define VCN_ENC_CMD_NO_OP		0x00000000
+#define VCN_ENC_CMD_END 		0x00000001
+#define VCN_ENC_CMD_IB			0x00000002
+#define VCN_ENC_CMD_FENCE		0x00000003
+#define VCN_ENC_CMD_TRAP		0x00000004
+#define VCN_ENC_CMD_REG_WRITE		0x0000000b
+#define VCN_ENC_CMD_REG_WAIT		0x0000000c
+
+#define VCN_AON_SOC_ADDRESS_2_0 	0x1f800
+#define VCN_VID_IP_ADDRESS_2_0		0x0
+#define VCN_AON_IP_ADDRESS_2_0		0x30000
+
+#define mmUVD_RBC_XX_IB_REG_CHECK 					0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 				1
+#define mmUVD_REG_XX_MASK 						0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 					1
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+#define RREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, mask, sram_sel) 			\
+	({	WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); 			\
+		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, 				\
+			UVD_DPG_LMA_CTL__MASK_EN_MASK | 				\
+			((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) 	\
+			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
+			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
+		RREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA); 			\
+	})
+
+#define WREG32_SOC15_DPG_MODE_1_0(ip, inst_idx, reg, value, mask, sram_sel) 		\
+	do { 										\
+		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_DATA, value); 			\
+		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_MASK, mask); 			\
+		WREG32_SOC15(ip, inst_idx, mmUVD_DPG_LMA_CTL, 				\
+			UVD_DPG_LMA_CTL__READ_WRITE_MASK | 				\
+			((adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg) 	\
+			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
+			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
+	} while (0)
+
+#define SOC15_DPG_MODE_OFFSET(ip, inst_idx, reg) 						\
+	({											\
+		/* To avoid a -Wunused-but-set-variable warning. */				\
+		uint32_t internal_reg_offset __maybe_unused, addr;				\
+		bool video_range, video1_range, aon_range, aon1_range;				\
+												\
+		addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg);		\
+		addr <<= 2; 									\
+		video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && 		\
+				((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600)))));	\
+		video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS_3_0)) && 		\
+				((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS_3_0 + 0x2600)))));	\
+		aon_range   = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && 		\
+				((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600)))));	\
+		aon1_range   = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS_3_0)) && 		\
+				((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS_3_0 + 0x600)))));	\
+		if (video_range) 								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + 	\
+				(VCN_VID_IP_ADDRESS_2_0));					\
+		else if (aon_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + 	\
+				(VCN_AON_IP_ADDRESS_2_0));					\
+		else if (video1_range) 								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS_3_0) + 	\
+				(VCN_VID_IP_ADDRESS_2_0));					\
+		else if (aon1_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS_3_0) + 	\
+				(VCN_AON_IP_ADDRESS_2_0));					\
+		else										\
+			internal_reg_offset = (0xFFFFF & addr);					\
+												\
+		internal_reg_offset >>= 2;							\
+	})
+
+#define RREG32_SOC15_DPG_MODE(inst_idx, offset, mask_en) 					\
+	({											\
+		WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, 					\
+			(0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |				\
+			mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |				\
+			offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));			\
+		RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA);				\
+	})
+
+#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect)             \
+	do {                                                                          \
+		if (!indirect) {                                                      \
+			WREG32_SOC15(VCN, GET_INST(VCN, inst_idx),                    \
+				     mmUVD_DPG_LMA_DATA, value);                      \
+			WREG32_SOC15(                                                 \
+				VCN, GET_INST(VCN, inst_idx),                         \
+				mmUVD_DPG_LMA_CTL,                                    \
+				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |          \
+				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |         \
+				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+		} else {                                                              \
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =              \
+				offset;                                               \
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =              \
+				value;                                                \
+		}                                                                     \
+	} while (0)
+
+#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg)						\
+	({											\
+		/* To avoid a -Wunused-but-set-variable warning. */				\
+		uint32_t internal_reg_offset __maybe_unused, addr;				\
+		bool video_range, video1_range, aon_range, aon1_range;				\
+												\
+		addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg);		\
+		addr <<= 2;									\
+		video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) &&			\
+				((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600)))));	\
+		video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) &&		\
+				((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600)))));	\
+		aon_range   = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) &&			\
+				((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600)))));		\
+		aon1_range   = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) &&		\
+				((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600)))));	\
+		if (video_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) +	\
+				(VCN_VID_IP_ADDRESS));						\
+		else if (aon_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) +	\
+				(VCN_AON_IP_ADDRESS));						\
+		else if (video1_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) +	\
+				(VCN_VID_IP_ADDRESS));						\
+		else if (aon1_range)								\
+			internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) +	\
+				(VCN_AON_IP_ADDRESS));						\
+		else										\
+			internal_reg_offset = (0xFFFFF & addr);					\
+												\
+		internal_reg_offset >>= 2;							\
+	})
+
+#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect)		\
+	do {										\
+		if (!indirect) {							\
+			WREG32_SOC15(VCN, GET_INST(VCN, inst_idx),			\
+				     regUVD_DPG_LMA_DATA, value);			\
+			WREG32_SOC15(							\
+				VCN, GET_INST(VCN, inst_idx),				\
+				regUVD_DPG_LMA_CTL,					\
+				(0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT |		\
+				 mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT |		\
+				 offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT));	\
+		} else {								\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =		\
+				offset;							\
+			*adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ =		\
+				value;							\
+		}									\
+	} while (0)
+
+#define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2)
+#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4)
+#define AMDGPU_VCN_FW_SHARED_FLAG_0_RB	(1 << 6)
+#define AMDGPU_VCN_MULTI_QUEUE_FLAG	(1 << 8)
+#define AMDGPU_VCN_SW_RING_FLAG		(1 << 9)
+#define AMDGPU_VCN_FW_LOGGING_FLAG	(1 << 10)
+#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11)
+#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14)
+#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15)
+
+#define MAX_NUM_VCN_RB_SETUP 4
+
+#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER	0x00000001
+#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER		0x00000001
+
+#define VCN_CODEC_DISABLE_MASK_AV1  (1 << 0)
+#define VCN_CODEC_DISABLE_MASK_VP9  (1 << 1)
+#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
+#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0)
+#define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1)
+
+#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_vcn_caps {
+	AMDGPU_VCN_RRMT_ENABLED,
+};
+
+#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps)
+
+enum fw_queue_mode {
+	FW_QUEUE_RING_RESET = 1,
+	FW_QUEUE_DPG_HOLD_OFF = 2,
+};
+
+enum engine_status_constants {
+	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
+	UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0 = 0x2A2A8AA0,
+	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+	UVD_STATUS__UVD_BUSY = 0x00000004,
+	GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+	UVD_STATUS__IDLE = 0x2,
+	UVD_STATUS__BUSY = 0x5,
+	UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+	UVD_STATUS__RBC_BUSY = 0x1,
+	UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0,
+};
+
+enum internal_dpg_state {
+	VCN_DPG_STATE__UNPAUSE = 0,
+	VCN_DPG_STATE__PAUSE,
+};
+
+struct dpg_pause_state {
+	enum internal_dpg_state fw_based;
+	enum internal_dpg_state jpeg;
+};
+
+struct amdgpu_vcn_reg{
+	unsigned	data0;
+	unsigned	data1;
+	unsigned	cmd;
+	unsigned	nop;
+	unsigned	context_id;
+	unsigned	ib_vmid;
+	unsigned	ib_bar_low;
+	unsigned	ib_bar_high;
+	unsigned	ib_size;
+	unsigned	gp_scratch8;
+	unsigned	scratch9;
+};
+
+struct amdgpu_vcn_fw_shared {
+	void        *cpu_addr;
+	uint64_t    gpu_addr;
+	uint32_t    mem_size;
+	uint32_t    log_offset;
+};
+
+struct amdgpu_vcn_inst {
+	struct amdgpu_device	*adev;
+	int			inst;
+	struct amdgpu_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	void			*saved_bo;
+	struct amdgpu_ring	ring_dec;
+	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+	atomic_t		sched_score;
+	struct amdgpu_irq_src	irq;
+	struct amdgpu_irq_src	ras_poison_irq;
+	struct amdgpu_vcn_reg	external;
+	struct amdgpu_bo	*dpg_sram_bo;
+	struct dpg_pause_state	pause_state;
+	void			*dpg_sram_cpu_addr;
+	uint64_t		dpg_sram_gpu_addr;
+	uint32_t		*dpg_sram_curr_addr;
+	atomic_t		dpg_enc_submission_cnt;
+	struct amdgpu_vcn_fw_shared fw_shared;
+	uint8_t			aid_id;
+	const struct firmware	*fw; /* VCN firmware */
+	uint8_t			vcn_config;
+	uint32_t		vcn_codec_disable_mask;
+	atomic_t		total_submission_cnt;
+	struct mutex		vcn_pg_lock;
+	enum amd_powergating_state cur_state;
+	struct delayed_work	idle_work;
+	unsigned		fw_version;
+	unsigned		num_enc_rings;
+	bool			indirect_sram;
+	struct amdgpu_vcn_reg	 internal;
+	struct mutex		vcn1_jpeg1_workaround;
+	int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst,
+			      struct dpg_pause_state *new_state);
+	int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
+			    enum amd_powergating_state state);
+	int (*reset)(struct amdgpu_vcn_inst *vinst);
+	bool using_unified_queue;
+	struct mutex		engine_reset_mutex;
+};
+
+struct amdgpu_vcn_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+
+struct amdgpu_vcn {
+	uint8_t	num_vcn_inst;
+	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
+
+	unsigned	harvest_config;
+
+	struct ras_common_if    *ras_if;
+	struct amdgpu_vcn_ras   *ras;
+
+	uint16_t inst_mask;
+	uint8_t	num_inst_per_aid;
+
+	/* IP reg dump */
+	uint32_t		*ip_dump;
+
+	uint32_t		supported_reset;
+	uint32_t		caps;
+
+	bool			per_inst_fw;
+	unsigned		fw_version;
+
+	bool			workload_profile_active;
+	struct mutex            workload_profile_mutex;
+	u32 reg_count;
+	const struct amdgpu_hwip_reg_entry *reg_list;
+};
+
+struct amdgpu_fw_shared_rb_ptrs_struct {
+	/* to WA DPG R/W ptr issues.*/
+	uint32_t  rptr;
+	uint32_t  wptr;
+};
+
+struct amdgpu_fw_shared_multi_queue {
+	uint8_t decode_queue_mode;
+	uint8_t encode_generalpurpose_queue_mode;
+	uint8_t encode_lowlatency_queue_mode;
+	uint8_t encode_realtime_queue_mode;
+	uint8_t padding[4];
+};
+
+struct amdgpu_fw_shared_sw_ring {
+	uint8_t is_enabled;
+	uint8_t padding[3];
+};
+
+struct amdgpu_fw_shared_unified_queue_struct {
+	uint8_t is_enabled;
+	uint8_t queue_mode;
+	uint8_t queue_status;
+	uint8_t padding[5];
+};
+
+struct amdgpu_fw_shared_fw_logging {
+	uint8_t is_enabled;
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+	uint32_t size;
+};
+
+struct amdgpu_fw_shared_smu_interface_info {
+	uint8_t smu_interface_type;
+	uint8_t padding[3];
+};
+
+struct amdgpu_fw_shared {
+	uint32_t present_flag_0;
+	uint8_t pad[44];
+	struct amdgpu_fw_shared_rb_ptrs_struct rb;
+	uint8_t pad1[1];
+	struct amdgpu_fw_shared_multi_queue multi_queue;
+	struct amdgpu_fw_shared_sw_ring sw_ring;
+	struct amdgpu_fw_shared_fw_logging fw_log;
+	struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
+};
+
+struct amdgpu_vcn_rb_setup_info {
+	uint32_t  rb_addr_lo;
+	uint32_t  rb_addr_hi;
+	uint32_t  rb_size;
+};
+
+struct amdgpu_fw_shared_rb_setup {
+	uint32_t is_rb_enabled_flags;
+
+	union {
+		struct {
+			uint32_t rb_addr_lo;
+			uint32_t rb_addr_hi;
+			uint32_t  rb_size;
+			uint32_t  rb4_addr_lo;
+			uint32_t  rb4_addr_hi;
+			uint32_t  rb4_size;
+			uint32_t  reserved[6];
+		};
+
+		struct {
+			struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP];
+		};
+	};
+};
+
+struct amdgpu_fw_shared_drm_key_wa {
+	uint8_t  method;
+	uint8_t  reserved[3];
+};
+
+struct amdgpu_fw_shared_queue_decouple {
+	uint8_t  is_enabled;
+	uint8_t  reserved[7];
+};
+
+struct amdgpu_vcn4_fw_shared {
+	uint32_t present_flag_0;
+	uint8_t pad[12];
+	struct amdgpu_fw_shared_unified_queue_struct sq;
+	uint8_t pad1[8];
+	struct amdgpu_fw_shared_fw_logging fw_log;
+	uint8_t pad2[20];
+	struct amdgpu_fw_shared_rb_setup rb_setup;
+	struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+	struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+	uint8_t pad3[9];
+	struct amdgpu_fw_shared_queue_decouple decouple;
+};
+
+struct amdgpu_vcn_fwlog {
+	uint32_t rptr;
+	uint32_t wptr;
+	uint32_t buffer_size;
+	uint32_t header_size;
+	uint8_t wrapped;
+};
+
+struct amdgpu_vcn_decode_buffer {
+	uint32_t valid_buf_flag;
+	uint32_t msg_buffer_address_hi;
+	uint32_t msg_buffer_address_lo;
+	uint32_t pad[30];
+};
+
+struct amdgpu_vcn_rb_metadata {
+	uint32_t size;
+	uint32_t present_flag_0;
+
+	uint8_t version;
+	uint8_t ring_id;
+	uint8_t pad[26];
+};
+
+struct amdgpu_vcn5_fw_shared {
+	uint32_t present_flag_0;
+	uint8_t pad[12];
+	struct amdgpu_fw_shared_unified_queue_struct sq;
+	uint8_t pad1[8];
+	struct amdgpu_fw_shared_fw_logging fw_log;
+	uint8_t pad2[20];
+	struct amdgpu_fw_shared_rb_setup rb_setup;
+	struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface;
+	struct amdgpu_fw_shared_drm_key_wa drm_key_wa;
+	uint8_t pad3[404];
+};
+
+#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
+#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
+#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
+
+enum vcn_ring_type {
+	VCN_ENCODE_RING,
+	VCN_DECODE_RING,
+	VCN_UNIFIED_RING,
+};
+
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+
+bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev,
+				enum vcn_ring_type type, uint32_t vcn_instance);
+
+int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
+
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i);
+
+void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
+void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
+                                   uint8_t i, struct amdgpu_vcn_inst *vcn);
+
+int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
+			struct amdgpu_irq_src *source,
+			struct amdgpu_iv_entry *entry);
+int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev,
+			struct ras_common_if *ras_block);
+int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
+
+int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
+			       enum AMDGPU_UCODE_ID ucode_id);
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
+
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+			      enum amd_powergating_state state);
+int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring,
+			  unsigned int vmid,
+			  struct amdgpu_fence *guilty_fence);
+int amdgpu_vcn_reg_dump_init(struct amdgpu_device *adev,
+			     const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_vcn_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_vcn_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
+void amdgpu_vcn_get_profile(struct amdgpu_device *adev);
+void amdgpu_vcn_put_profile(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
new file mode 100644
index 000000000000..f9d3d79f68b1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_vf_error.h"
+#include "mxgpu_ai.h"
+
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+			 uint16_t sub_error_code,
+			 uint16_t error_flags,
+			 uint64_t error_data)
+{
+	int index;
+	uint16_t error_code;
+
+	if (!amdgpu_sriov_vf(adev))
+		return;
+
+	error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code);
+
+	mutex_lock(&adev->virt.vf_errors.lock);
+	index = adev->virt.vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+	adev->virt.vf_errors.code [index] = error_code;
+	adev->virt.vf_errors.flags [index] = error_flags;
+	adev->virt.vf_errors.data [index] = error_data;
+	adev->virt.vf_errors.write_count ++;
+	mutex_unlock(&adev->virt.vf_errors.lock);
+}
+
+
+void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
+{
+	/* u32 pf2vf_flags = 0; */
+	u32 data1, data2, data3;
+	int index;
+
+	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) ||
+	    (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) {
+		return;
+	}
+/*
+	TODO: Enable these code when pv2vf_info is merged
+	AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags);
+	if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) {
+		return;
+	}
+*/
+
+	mutex_lock(&adev->virt.vf_errors.lock);
+	/* The errors are overlay of array, correct read_count as full. */
+	if (adev->virt.vf_errors.write_count - adev->virt.vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) {
+		adev->virt.vf_errors.read_count = adev->virt.vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
+	}
+
+	while (adev->virt.vf_errors.read_count < adev->virt.vf_errors.write_count) {
+		index =adev->virt.vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE;
+		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(adev->virt.vf_errors.code[index],
+							   adev->virt.vf_errors.flags[index]);
+		data2 = adev->virt.vf_errors.data[index] & 0xFFFFFFFF;
+		data3 = (adev->virt.vf_errors.data[index] >> 32) & 0xFFFFFFFF;
+
+		adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3);
+		adev->virt.vf_errors.read_count ++;
+	}
+	mutex_unlock(&adev->virt.vf_errors.lock);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
new file mode 100644
index 000000000000..6436bd053325
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VF_ERROR_H__
+#define __VF_ERROR_H__
+
+#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f)    (((c & 0xFFFF) << 16) | (f & 0xFFFF))
+#define AMDGIM_ERROR_CODE(t,c)       (((t&0xF)<<12)|(c&0xFFF))
+
+/* Please keep enum same as AMD GIM driver */
+enum AMDGIM_ERROR_VF {
+	AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL = 0,
+	AMDGIM_ERROR_VF_NO_VBIOS,
+	AMDGIM_ERROR_VF_GPU_POST_ERROR,
+	AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL,
+	AMDGIM_ERROR_VF_FENCE_INIT_FAIL,
+
+	AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL,
+	AMDGIM_ERROR_VF_IB_INIT_FAIL,
+	AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL,
+	AMDGIM_ERROR_VF_ASIC_RESUME_FAIL,
+	AMDGIM_ERROR_VF_GPU_RESET_FAIL,
+
+	AMDGIM_ERROR_VF_TEST,
+	AMDGIM_ERROR_VF_MAX
+};
+
+enum AMDGIM_ERROR_CATEGORY {
+	AMDGIM_ERROR_CATEGORY_NON_USED = 0,
+	AMDGIM_ERROR_CATEGORY_GIM,
+	AMDGIM_ERROR_CATEGORY_PF,
+	AMDGIM_ERROR_CATEGORY_VF,
+	AMDGIM_ERROR_CATEGORY_VBIOS,
+	AMDGIM_ERROR_CATEGORY_MONITOR,
+
+	AMDGIM_ERROR_CATEGORY_MAX
+};
+
+void amdgpu_vf_error_put(struct amdgpu_device *adev,
+			 uint16_t sub_error_code,
+			 uint16_t error_flags,
+			 uint64_t error_data);
+void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
+
+#endif /* __VF_ERROR_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
new file mode 100644
index 000000000000..47a6ce4fdc74
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -0,0 +1,1847 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
+#include <drm/drm_drv.h>
+#include <xen/xen.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_dpm.h"
+#include "vi.h"
+#include "soc15.h"
+#include "nv.h"
+
+#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
+	do { \
+		vf2pf_info->ucode_info[ucode].id = ucode; \
+		vf2pf_info->ucode_info[ucode].version = ver; \
+	} while (0)
+
+#define mmRCC_CONFIG_MEMSIZE    0xde3
+
+const char *amdgpu_virt_dynamic_crit_table_name[] = {
+	"IP DISCOVERY",
+	"VBIOS IMG",
+	"RAS TELEMETRY",
+	"DATA EXCHANGE",
+	"BAD PAGE INFO",
+	"INIT HEADER",
+	"LAST",
+};
+
+bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
+{
+	/* By now all MMIO pages except mailbox are blocked */
+	/* if blocking is enabled in hypervisor. Choose the */
+	/* SCRATCH_REG0 to test. */
+	return RREG32_NO_KIQ(0xc040) == 0xffffffff;
+}
+
+void amdgpu_virt_init_setting(struct amdgpu_device *adev)
+{
+	struct drm_device *ddev = adev_to_drm(adev);
+
+	/* enable virtual display */
+	if (adev->asic_type != CHIP_ALDEBARAN &&
+	    adev->asic_type != CHIP_ARCTURUS &&
+	    ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
+		if (adev->mode_info.num_crtc == 0)
+			adev->mode_info.num_crtc = 1;
+		adev->enable_virtual_display = true;
+	}
+	ddev->driver_features &= ~DRIVER_ATOMIC;
+	adev->cg_flags = 0;
+	adev->pg_flags = 0;
+
+	/* Reduce kcq number to 2 to reduce latency */
+	if (amdgpu_num_kcq == -1)
+		amdgpu_num_kcq = 2;
+}
+
+/**
+ * amdgpu_virt_request_full_gpu() - request full gpu access
+ * @adev:	amdgpu device.
+ * @init:	is driver init time.
+ * When start to init/fini driver, first need to request full gpu access.
+ * Return: Zero if request success, otherwise will return error.
+ */
+int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int r;
+
+	if (virt->ops && virt->ops->req_full_gpu) {
+		r = virt->ops->req_full_gpu(adev, init);
+		if (r) {
+			adev->no_hw_access = true;
+			return r;
+		}
+
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_virt_release_full_gpu() - release full gpu access
+ * @adev:	amdgpu device.
+ * @init:	is driver init time.
+ * When finishing driver init/fini, need to release full gpu access.
+ * Return: Zero if release success, otherwise will returen error.
+ */
+int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int r;
+
+	if (virt->ops && virt->ops->rel_full_gpu) {
+		r = virt->ops->rel_full_gpu(adev, init);
+		if (r)
+			return r;
+
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_virt_reset_gpu() - reset gpu
+ * @adev:	amdgpu device.
+ * Send reset command to GPU hypervisor to reset GPU that VM is using
+ * Return: Zero if reset success, otherwise will return error.
+ */
+int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int r;
+
+	if (virt->ops && virt->ops->reset_gpu) {
+		r = virt->ops->reset_gpu(adev);
+		if (r)
+			return r;
+
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+	}
+
+	return 0;
+}
+
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->req_init_data)
+		virt->ops->req_init_data(adev);
+
+	if (adev->virt.req_init_data_ver > 0)
+		dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
+				 adev->virt.req_init_data_ver);
+	else
+		dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
+}
+
+/**
+ * amdgpu_virt_ready_to_reset() - send ready to reset to host
+ * @adev:	amdgpu device.
+ * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
+ * activity and is ready for host FLR
+ */
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->reset_gpu)
+		virt->ops->ready_to_reset(adev);
+}
+
+/**
+ * amdgpu_virt_wait_reset() - wait for reset gpu completed
+ * @adev:	amdgpu device.
+ * Wait for GPU reset completed.
+ * Return: Zero if reset success, otherwise will return error.
+ */
+int amdgpu_virt_wait_reset(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (!virt->ops || !virt->ops->wait_reset)
+		return -EINVAL;
+
+	return virt->ops->wait_reset(adev);
+}
+
+/**
+ * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
+ * @adev:	amdgpu device.
+ * MM table is used by UVD and VCE for its initialization
+ * Return: Zero if allocate success.
+ */
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
+		return 0;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->virt.mm_table.bo,
+				    &adev->virt.mm_table.gpu_addr,
+				    (void *)&adev->virt.mm_table.cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
+		return r;
+	}
+
+	memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
+	dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
+		 adev->virt.mm_table.gpu_addr,
+		 adev->virt.mm_table.cpu_addr);
+	return 0;
+}
+
+/**
+ * amdgpu_virt_free_mm_table() - free mm table memory
+ * @adev:	amdgpu device.
+ * Free MM table memory
+ */
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
+		return;
+
+	amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
+			      &adev->virt.mm_table.gpu_addr,
+			      (void *)&adev->virt.mm_table.cpu_addr);
+	adev->virt.mm_table.gpu_addr = 0;
+}
+
+/**
+ * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
+ * @adev:	amdgpu device.
+ * Check whether host sent RAS error message
+ * Return: true if found, otherwise false
+ */
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (!virt->ops || !virt->ops->rcvd_ras_intr)
+		return false;
+
+	return virt->ops->rcvd_ras_intr(adev);
+}
+
+
+unsigned int amd_sriov_msg_checksum(void *obj,
+				unsigned long obj_size,
+				unsigned int key,
+				unsigned int checksum)
+{
+	unsigned int ret = key;
+	unsigned long i = 0;
+	unsigned char *pos;
+
+	pos = (char *)obj;
+	/* calculate checksum */
+	for (i = 0; i < obj_size; ++i)
+		ret += *(pos + i);
+	/* minus the checksum itself */
+	pos = (char *)&checksum;
+	for (i = 0; i < sizeof(checksum); ++i)
+		ret -= *(pos + i);
+	return ret;
+}
+
+static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
+	/* GPU will be marked bad on host if bp count more then 10,
+	 * so alloc 512 is enough.
+	 */
+	unsigned int align_space = 512;
+	void *bps = NULL;
+	struct amdgpu_bo **bps_bo = NULL;
+
+	*data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
+	if (!*data)
+		goto data_failure;
+
+	bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
+	if (!bps)
+		goto bps_failure;
+
+	bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
+	if (!bps_bo)
+		goto bps_bo_failure;
+
+	(*data)->bps = bps;
+	(*data)->bps_bo = bps_bo;
+	(*data)->count = 0;
+	(*data)->last_reserved = 0;
+
+	virt->ras_init_done = true;
+
+	return 0;
+
+bps_bo_failure:
+	kfree(bps);
+bps_failure:
+	kfree(*data);
+data_failure:
+	return -ENOMEM;
+}
+
+static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+	struct amdgpu_bo *bo;
+	int i;
+
+	if (!data)
+		return;
+
+	for (i = data->last_reserved - 1; i >= 0; i--) {
+		bo = data->bps_bo[i];
+		if (bo) {
+			amdgpu_bo_free_kernel(&bo, NULL, NULL);
+			data->bps_bo[i] = bo;
+		}
+		data->last_reserved = i;
+	}
+}
+
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+	virt->ras_init_done = false;
+
+	if (!data)
+		return;
+
+	amdgpu_virt_ras_release_bp(adev);
+
+	kfree(data->bps);
+	kfree(data->bps_bo);
+	kfree(data);
+	virt->virt_eh_data = NULL;
+}
+
+static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
+		struct eeprom_table_record *bps, int pages)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+
+	if (!data)
+		return;
+
+	memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
+	data->count += pages;
+}
+
+static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	struct amdgpu_bo *bo = NULL;
+	uint64_t bp;
+	int i;
+
+	if (!data)
+		return;
+
+	for (i = data->last_reserved; i < data->count; i++) {
+		bp = data->bps[i].retired_page;
+
+		/* There are two cases of reserve error should be ignored:
+		 * 1) a ras bad page has been allocated (used by someone);
+		 * 2) a ras bad page has been reserved (duplicate error injection
+		 *    for one page);
+		 */
+		if  (ttm_resource_manager_used(man)) {
+			amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+				bp << AMDGPU_GPU_PAGE_SHIFT,
+				AMDGPU_GPU_PAGE_SIZE);
+			data->bps_bo[i] = NULL;
+		} else {
+			if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
+							AMDGPU_GPU_PAGE_SIZE,
+							&bo, NULL))
+				dev_dbg(adev->dev,
+						"RAS WARN: reserve vram for retired page %llx fail\n",
+						bp);
+			data->bps_bo[i] = bo;
+		}
+		data->last_reserved = i + 1;
+		bo = NULL;
+	}
+}
+
+static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
+		uint64_t retired_page)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+	int i;
+
+	if (!data)
+		return true;
+
+	for (i = 0; i < data->count; i++)
+		if (retired_page == data->bps[i].retired_page)
+			return true;
+
+	return false;
+}
+
+static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
+		uint64_t bp_block_offset, uint32_t bp_block_size)
+{
+	struct eeprom_table_record bp;
+	uint64_t retired_page;
+	uint32_t bp_idx, bp_cnt;
+	void *vram_usage_va = NULL;
+
+	if (adev->mman.fw_vram_usage_va)
+		vram_usage_va = adev->mman.fw_vram_usage_va;
+	else
+		vram_usage_va = adev->mman.drv_vram_usage_va;
+
+	memset(&bp, 0, sizeof(bp));
+
+	if (bp_block_size) {
+		bp_cnt = bp_block_size / sizeof(uint64_t);
+		for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
+			retired_page = *(uint64_t *)(vram_usage_va +
+					bp_block_offset + bp_idx * sizeof(uint64_t));
+			bp.retired_page = retired_page;
+
+			if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
+				continue;
+
+			amdgpu_virt_ras_add_bps(adev, &bp, 1);
+
+			amdgpu_virt_ras_reserve_bps(adev);
+		}
+	}
+}
+
+static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf;
+	uint32_t checksum;
+	uint32_t checkval;
+
+	uint32_t i;
+	uint32_t tmp;
+
+	if (adev->virt.fw_reserve.p_pf2vf == NULL)
+		return -EINVAL;
+
+	if (pf2vf_info->size > 1024) {
+		dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
+		return -EINVAL;
+	}
+
+	switch (pf2vf_info->version) {
+	case 1:
+		checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum;
+		checkval = amd_sriov_msg_checksum(
+			adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
+			adev->virt.fw_reserve.checksum_key, checksum);
+		if (checksum != checkval) {
+			dev_err(adev->dev,
+				"invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+				checksum, checkval);
+			return -EINVAL;
+		}
+
+		adev->virt.gim_feature =
+			((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags;
+		break;
+	case 2:
+		/* TODO: missing key, need to add it later */
+		checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum;
+		checkval = amd_sriov_msg_checksum(
+			adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
+			0, checksum);
+		if (checksum != checkval) {
+			dev_err(adev->dev,
+				"invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+				checksum, checkval);
+			return -EINVAL;
+		}
+
+		adev->virt.vf2pf_update_interval_ms =
+			((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
+		adev->virt.gim_feature =
+			((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
+		adev->virt.reg_access =
+			((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all;
+
+		adev->virt.decode_max_dimension_pixels = 0;
+		adev->virt.decode_max_frame_pixels = 0;
+		adev->virt.encode_max_dimension_pixels = 0;
+		adev->virt.encode_max_frame_pixels = 0;
+		adev->virt.is_mm_bw_enabled = false;
+		for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) {
+			tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels;
+			adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels);
+
+			tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels;
+			adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels);
+
+			tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels;
+			adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels);
+
+			tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
+			adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
+		}
+		if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
+			adev->virt.is_mm_bw_enabled = true;
+
+		adev->unique_id =
+			((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+		adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
+		adev->virt.ras_telemetry_en_caps.all =
+			((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
+		break;
+	default:
+		dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
+		return -EINVAL;
+	}
+
+	/* correct too large or too little interval value */
+	if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
+		adev->virt.vf2pf_update_interval_ms = 2000;
+
+	return 0;
+}
+
+static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_vf2pf_info *vf2pf_info;
+	vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
+
+	if (adev->virt.fw_reserve.p_vf2pf == NULL)
+		return;
+
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE,      adev->vce.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD,      adev->uvd.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC,       adev->gmc.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME,       adev->gfx.me_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP,      adev->gfx.pfp_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE,       adev->gfx.ce_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC,      adev->gfx.rlc_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC,      adev->gfx.mec_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2,     adev->gfx.mec2_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS,      adev->psp.sos.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
+			    adev->psp.asd_context.bin_desc.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS,
+			    adev->psp.ras_context.context.bin_desc.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI,
+			    adev->psp.xgmi_context.context.bin_desc.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC,      adev->pm.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA,     adev->sdma.instance[0].fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2,    adev->sdma.instance[1].fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN,      adev->vcn.fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU,     adev->dm.dmcu_fw_version);
+}
+
+static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_vf2pf_info *vf2pf_info;
+
+	vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
+
+	if (adev->virt.fw_reserve.p_vf2pf == NULL)
+		return -EINVAL;
+
+	memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info));
+
+	vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info);
+	vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER;
+
+#ifdef MODULE
+	if (THIS_MODULE->version != NULL)
+		strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
+	else
+#endif
+		strcpy(vf2pf_info->driver_version, "N/A");
+
+	vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
+	vf2pf_info->driver_cert = 0;
+	vf2pf_info->os_info.all = 0;
+
+	vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
+		 ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
+	vf2pf_info->fb_vis_usage =
+		amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
+	vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
+	vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
+
+	amdgpu_virt_populate_vf2pf_ucode_info(adev);
+
+	/* TODO: read dynamic info */
+	vf2pf_info->gfx_usage = 0;
+	vf2pf_info->compute_usage = 0;
+	vf2pf_info->encode_usage = 0;
+	vf2pf_info->decode_usage = 0;
+
+	vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+	if (amdgpu_sriov_is_mes_info_enable(adev)) {
+		vf2pf_info->mes_info_addr =
+			(uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
+		vf2pf_info->mes_info_size =
+			adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
+	}
+	vf2pf_info->checksum =
+		amd_sriov_msg_checksum(
+		vf2pf_info, sizeof(*vf2pf_info), 0, 0);
+
+	return 0;
+}
+
+static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work);
+	int ret;
+
+	ret = amdgpu_virt_read_pf2vf_data(adev);
+	if (ret) {
+		adev->virt.vf2pf_update_retry_cnt++;
+
+		if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
+			adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+			amdgpu_sriov_runtime(adev)) {
+
+			amdgpu_ras_set_fed(adev, true);
+			if (amdgpu_reset_domain_schedule(adev->reset_domain,
+							&adev->kfd.reset_work))
+				return;
+			else
+				dev_err(adev->dev, "Failed to queue work! at %s", __func__);
+		}
+
+		goto out;
+	}
+
+	adev->virt.vf2pf_update_retry_cnt = 0;
+	amdgpu_virt_write_vf2pf_data(adev);
+
+out:
+	schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
+}
+
+static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
+{
+	uint32_t dataexchange_offset =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
+	uint32_t dataexchange_size =
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
+	uint64_t pos = 0;
+
+	dev_info(adev->dev,
+			"Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+			dataexchange_offset, dataexchange_size);
+
+	if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
+		dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+
+	pos = (uint64_t)dataexchange_offset;
+	amdgpu_device_vram_access(adev, pos, pfvf_data,
+			dataexchange_size, false);
+
+	return 0;
+}
+
+void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
+{
+	if (adev->virt.vf2pf_update_interval_ms != 0) {
+		dev_info(adev->dev, "clean up the vf2pf work item\n");
+		cancel_delayed_work_sync(&adev->virt.vf2pf_work);
+		adev->virt.vf2pf_update_interval_ms = 0;
+	}
+}
+
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
+{
+	uint32_t *pfvf_data = NULL;
+
+	adev->virt.fw_reserve.p_pf2vf = NULL;
+	adev->virt.fw_reserve.p_vf2pf = NULL;
+	adev->virt.vf2pf_update_interval_ms = 0;
+	adev->virt.vf2pf_update_retry_cnt = 0;
+
+	if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+		dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
+	} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+		/* go through this logic in ip_init and reset to init workqueue*/
+		amdgpu_virt_exchange_data(adev);
+
+		INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
+		schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
+	} else if (adev->bios != NULL) {
+		/* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
+		if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+			pfvf_data =
+				kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
+					GFP_KERNEL);
+			if (!pfvf_data) {
+				dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
+				return;
+			}
+
+			if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
+				goto free_pfvf_data;
+
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
+
+			amdgpu_virt_read_pf2vf_data(adev);
+
+free_pfvf_data:
+			kfree(pfvf_data);
+			pfvf_data = NULL;
+			adev->virt.fw_reserve.p_pf2vf = NULL;
+		} else {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+
+			amdgpu_virt_read_pf2vf_data(adev);
+		}
+	}
+}
+
+
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
+{
+	uint64_t bp_block_offset = 0;
+	uint32_t bp_block_size = 0;
+	struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
+
+	if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+		if (adev->mman.fw_vram_usage_va) {
+			if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
+					(AMD_SRIOV_MSG_SIZE_KB << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va +
+					adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
+			} else {
+				adev->virt.fw_reserve.p_pf2vf =
+					(struct amd_sriov_msg_pf2vf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.p_vf2pf =
+					(struct amd_sriov_msg_vf2pf_info_header *)
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+				adev->virt.fw_reserve.ras_telemetry =
+					(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+			}
+		} else if (adev->mman.drv_vram_usage_va) {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
+			adev->virt.fw_reserve.p_vf2pf =
+				(struct amd_sriov_msg_vf2pf_info_header *)
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
+			adev->virt.fw_reserve.ras_telemetry =
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
+		}
+
+		amdgpu_virt_read_pf2vf_data(adev);
+		amdgpu_virt_write_vf2pf_data(adev);
+
+		/* bad page handling for version 2 */
+		if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
+			pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+
+			bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+				((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+			bp_block_size = pf2vf_v2->bp_block_size;
+
+			if (bp_block_size && !adev->virt.ras_init_done)
+				amdgpu_virt_init_ras_err_handler_data(adev);
+
+			if (adev->virt.ras_init_done)
+				amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+		}
+	}
+}
+
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
+{
+	uint32_t reg;
+
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+		reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+		break;
+	case CHIP_VEGA10:
+	case CHIP_VEGA20:
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_SIENNA_CICHLID:
+	case CHIP_ARCTURUS:
+	case CHIP_ALDEBARAN:
+	case CHIP_IP_DISCOVERY:
+		reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
+		break;
+	default: /* other chip doesn't support SRIOV */
+		reg = 0;
+		break;
+	}
+
+	if (reg & 1)
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+	if (reg & 0x80000000)
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+	if (!reg) {
+		/* passthrough mode exclus sriov mod */
+		if (is_virtual_machine() && !xen_initial_domain())
+			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+	}
+
+	return reg;
+}
+
+static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
+{
+	bool is_sriov = false;
+
+	/* we have the ability to check now */
+	if (amdgpu_sriov_vf(adev)) {
+		is_sriov = true;
+
+		switch (adev->asic_type) {
+		case CHIP_TONGA:
+		case CHIP_FIJI:
+			vi_set_virt_ops(adev);
+			break;
+		case CHIP_VEGA10:
+			soc15_set_virt_ops(adev);
+#ifdef CONFIG_X86
+			/* not send GPU_INIT_DATA with MS_HYPERV*/
+			if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
+#endif
+				/* send a dummy GPU_INIT_DATA request to host on vega10 */
+				amdgpu_virt_request_init_data(adev);
+			break;
+		case CHIP_VEGA20:
+		case CHIP_ARCTURUS:
+		case CHIP_ALDEBARAN:
+			soc15_set_virt_ops(adev);
+			break;
+		case CHIP_NAVI10:
+		case CHIP_NAVI12:
+		case CHIP_SIENNA_CICHLID:
+		case CHIP_IP_DISCOVERY:
+			nv_set_virt_ops(adev);
+			/* try send GPU_INIT_DATA request to host */
+			amdgpu_virt_request_init_data(adev);
+			break;
+		default: /* other chip doesn't support SRIOV */
+			is_sriov = false;
+			dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
+			break;
+		}
+	}
+
+	return is_sriov;
+}
+
+static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
+{
+	ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
+	ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+	ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
+
+	ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
+			    RATELIMIT_MSG_ON_RELEASE);
+	ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
+			    RATELIMIT_MSG_ON_RELEASE);
+	ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
+			    RATELIMIT_MSG_ON_RELEASE);
+
+	mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+	mutex_init(&adev->virt.access_req_mutex);
+
+	adev->virt.ras.cper_rptr = 0;
+}
+
+static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
+{
+	uint32_t sum = 0;
+
+	if (buf_start >= buf_end)
+		return 0;
+
+	for (; buf_start < buf_end; buf_start++)
+		sum += buf_start[0];
+
+	return 0xffffffff - sum;
+}
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
+{
+	struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
+	u64 init_hdr_offset = adev->virt.init_data_header.offset;
+	u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
+	u64 vram_size;
+	u64 end;
+	int r = 0;
+	uint8_t checksum = 0;
+
+	/* Skip below init if critical region version != v2 */
+	if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
+		return 0;
+
+	if (init_hdr_offset < 0) {
+		dev_err(adev->dev, "Invalid init header offset\n");
+		return -EINVAL;
+	}
+
+	vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+	if (!vram_size || vram_size == U32_MAX)
+		return -EINVAL;
+	vram_size <<= 20;
+
+	if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
+		dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
+		return -EINVAL;
+	}
+
+	/* Allocate for init_data_hdr */
+	init_data_hdr = kzalloc(sizeof(struct amd_sriov_msg_init_data_header), GFP_KERNEL);
+	if (!init_data_hdr)
+		return -ENOMEM;
+
+	amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
+					sizeof(struct amd_sriov_msg_init_data_header), false);
+
+	/* Table validation */
+	if (strncmp(init_data_hdr->signature,
+				AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
+				AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
+		dev_err(adev->dev, "Invalid init data signature: %.4s\n",
+			init_data_hdr->signature);
+		r = -EINVAL;
+		goto out;
+	}
+
+	checksum = amdgpu_virt_crit_region_calc_checksum(
+			(uint8_t *)&init_data_hdr->initdata_offset,
+			(uint8_t *)init_data_hdr +
+			sizeof(struct amd_sriov_msg_init_data_header));
+	if (checksum != init_data_hdr->checksum) {
+		dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
+				checksum, init_data_hdr->checksum);
+		r = -EINVAL;
+		goto out;
+	}
+
+	memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
+	memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
+
+	adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
+	adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
+
+	/* Validation and initialization for each table entry */
+	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
+		if (!init_data_hdr->ip_discovery_size_in_kb ||
+				init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
+			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
+				init_data_hdr->ip_discovery_size_in_kb);
+			r = -EINVAL;
+			goto out;
+		}
+
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
+			init_data_hdr->ip_discovery_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
+			init_data_hdr->ip_discovery_size_in_kb;
+	}
+
+	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
+		if (!init_data_hdr->vbios_img_size_in_kb) {
+			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
+				init_data_hdr->vbios_img_size_in_kb);
+			r = -EINVAL;
+			goto out;
+		}
+
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
+			init_data_hdr->vbios_img_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
+			init_data_hdr->vbios_img_size_in_kb;
+	}
+
+	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
+		if (!init_data_hdr->ras_tele_info_size_in_kb) {
+			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
+				init_data_hdr->ras_tele_info_size_in_kb);
+			r = -EINVAL;
+			goto out;
+		}
+
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
+			init_data_hdr->ras_tele_info_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
+			init_data_hdr->ras_tele_info_size_in_kb;
+	}
+
+	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
+		if (!init_data_hdr->dataexchange_size_in_kb) {
+			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
+				init_data_hdr->dataexchange_size_in_kb);
+			r = -EINVAL;
+			goto out;
+		}
+
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
+			init_data_hdr->dataexchange_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
+			init_data_hdr->dataexchange_size_in_kb;
+	}
+
+	if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
+		if (!init_data_hdr->bad_page_size_in_kb) {
+			dev_err(adev->dev, "Invalid %s size: 0x%x\n",
+				amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
+				init_data_hdr->bad_page_size_in_kb);
+			r = -EINVAL;
+			goto out;
+		}
+
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
+			init_data_hdr->bad_page_info_offset;
+		adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
+			init_data_hdr->bad_page_size_in_kb;
+	}
+
+	/* Validation for critical region info */
+	if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
+		dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
+				adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
+		r = -EINVAL;
+		goto out;
+	}
+
+	/* reserved memory starts from crit region base offset with the size of 5MB */
+	adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
+	adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
+	dev_info(adev->dev,
+		"critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
+			init_data_hdr->version,
+			adev->mman.fw_vram_usage_start_offset,
+			adev->mman.fw_vram_usage_size >> 10);
+
+	adev->virt.is_dynamic_crit_regn_enabled = true;
+
+out:
+	kfree(init_data_hdr);
+	init_data_hdr = NULL;
+
+	return r;
+}
+
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, u32 *size)
+{
+	uint32_t data_offset = 0;
+	uint32_t data_size = 0;
+	enum amd_sriov_msg_table_id_enum data_table_id = data_id;
+
+	if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
+		return -EINVAL;
+
+	data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
+	data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
+
+	/* Validate on input params */
+	if (!binary || !size || *size < (uint64_t)data_size)
+		return -EINVAL;
+
+	/* Proceed to copy the dynamic content */
+	amdgpu_device_vram_access(adev,
+			(uint64_t)data_offset, (uint32_t *)binary, data_size, false);
+	*size = (uint64_t)data_size;
+
+	dev_dbg(adev->dev,
+		"Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
+		amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
+
+	return 0;
+}
+
+void amdgpu_virt_init(struct amdgpu_device *adev)
+{
+	bool is_sriov = false;
+	uint32_t reg = amdgpu_virt_init_detect_asic(adev);
+
+	is_sriov = amdgpu_virt_init_req_data(adev, reg);
+
+	if (is_sriov)
+		amdgpu_virt_init_ras(adev);
+}
+
+static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
+{
+	return amdgpu_sriov_is_debug(adev) ? true : false;
+}
+
+static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
+{
+	return amdgpu_sriov_is_normal(adev) ? true : false;
+}
+
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev) ||
+	    amdgpu_virt_access_debugfs_is_kiq(adev))
+		return 0;
+
+	if (amdgpu_virt_access_debugfs_is_mmio(adev))
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+	else
+		return -EPERM;
+
+	return 0;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
+{
+	enum amdgpu_sriov_vf_mode mode;
+
+	if (amdgpu_sriov_vf(adev)) {
+		if (amdgpu_sriov_is_pp_one_vf(adev))
+			mode = SRIOV_VF_MODE_ONE_VF;
+		else
+			mode = SRIOV_VF_MODE_MULTI_VF;
+	} else {
+		mode = SRIOV_VF_MODE_BARE_METAL;
+	}
+
+	return mode;
+}
+
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
+{
+	/* stop the data exchange thread */
+	amdgpu_virt_fini_data_exchange(adev);
+	amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
+}
+
+void amdgpu_virt_post_reset(struct amdgpu_device *adev)
+{
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
+		/* force set to GFXOFF state after reset,
+		 * to avoid some invalid operation before GC enable
+		 */
+		adev->gfx.is_poweron = false;
+	}
+
+	adev->mes.ring[0].sched.ready = false;
+}
+
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
+{
+	switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+	case IP_VERSION(13, 0, 0):
+		/* no vf autoload, white list */
+		if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
+		    ucode_id == AMDGPU_UCODE_ID_VCN)
+			return false;
+		else
+			return true;
+	case IP_VERSION(11, 0, 9):
+	case IP_VERSION(11, 0, 7):
+		/* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
+		if (ucode_id == AMDGPU_UCODE_ID_RLC_G
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+		    || ucode_id == AMDGPU_UCODE_ID_SMC)
+			return true;
+		else
+			return false;
+	case IP_VERSION(13, 0, 10):
+		/* white list */
+		if (ucode_id == AMDGPU_UCODE_ID_CAP
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
+		|| ucode_id == AMDGPU_UCODE_ID_CP_MES
+		|| ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
+		|| ucode_id == AMDGPU_UCODE_ID_CP_MES1
+		|| ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
+		|| ucode_id == AMDGPU_UCODE_ID_VCN1
+		|| ucode_id == AMDGPU_UCODE_ID_VCN)
+			return false;
+		else
+			return true;
+	default:
+		/* lagacy black list */
+		if (ucode_id == AMDGPU_UCODE_ID_SDMA0
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA1
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA2
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA3
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA4
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA5
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA6
+		    || ucode_id == AMDGPU_UCODE_ID_SDMA7
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_G
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+		    || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
+		    || ucode_id == AMDGPU_UCODE_ID_SMC)
+			return true;
+		else
+			return false;
+	}
+}
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+			struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+			struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
+{
+	uint32_t i;
+
+	if (!adev->virt.is_mm_bw_enabled)
+		return;
+
+	if (encode) {
+		for (i = 0; i < encode_array_size; i++) {
+			encode[i].max_width = adev->virt.encode_max_dimension_pixels;
+			encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels;
+			if (encode[i].max_width > 0)
+				encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width;
+			else
+				encode[i].max_height = 0;
+		}
+	}
+
+	if (decode) {
+		for (i = 0; i < decode_array_size; i++) {
+			decode[i].max_width = adev->virt.decode_max_dimension_pixels;
+			decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels;
+			if (decode[i].max_width > 0)
+				decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width;
+			else
+				decode[i].max_height = 0;
+		}
+	}
+}
+
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+						 u32 acc_flags, u32 hwip,
+						 bool write, u32 *rlcg_flag)
+{
+	bool ret = false;
+
+	switch (hwip) {
+	case GC_HWIP:
+		if (amdgpu_sriov_reg_indirect_gc(adev)) {
+			*rlcg_flag =
+				write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
+			ret = true;
+		/* only in new version, AMDGPU_REGS_NO_KIQ and
+		 * AMDGPU_REGS_RLC are enabled simultaneously */
+		} else if ((acc_flags & AMDGPU_REGS_RLC) &&
+				!(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
+			*rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
+			ret = true;
+		}
+		break;
+	case MMHUB_HWIP:
+		if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
+		    (acc_flags & AMDGPU_REGS_RLC) && write) {
+			*rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
+			ret = true;
+		}
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
+{
+	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+	uint32_t timeout = 50000;
+	uint32_t i, tmp;
+	uint32_t ret = 0;
+	void *scratch_reg0;
+	void *scratch_reg1;
+	void *scratch_reg2;
+	void *scratch_reg3;
+	void *spare_int;
+	unsigned long flags;
+
+	if (!adev->gfx.rlc.rlcg_reg_access_supported) {
+		dev_err(adev->dev,
+			"indirect registers access through rlcg is not available\n");
+		return 0;
+	}
+
+	if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
+		dev_err(adev->dev, "invalid xcc\n");
+		return 0;
+	}
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
+	scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
+	scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
+	scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
+	scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
+
+	spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
+
+	if (reg_access_ctrl->spare_int)
+		spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
+
+	if (offset == reg_access_ctrl->grbm_cntl) {
+		/* if the target reg offset is grbm_cntl, write to scratch_reg2 */
+		writel(v, scratch_reg2);
+		if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+			writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+	} else if (offset == reg_access_ctrl->grbm_idx) {
+		/* if the target reg offset is grbm_idx, write to scratch_reg3 */
+		writel(v, scratch_reg3);
+		if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
+			writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
+	} else {
+		/*
+		 * SCRATCH_REG0 	= read/write value
+		 * SCRATCH_REG1[30:28]	= command
+		 * SCRATCH_REG1[19:0]	= address in dword
+		 * SCRATCH_REG1[27:24]	= Error reporting
+		 */
+		writel(v, scratch_reg0);
+		writel((offset | flag), scratch_reg1);
+		if (reg_access_ctrl->spare_int)
+			writel(1, spare_int);
+
+		for (i = 0; i < timeout; i++) {
+			tmp = readl(scratch_reg1);
+			if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
+				break;
+			udelay(10);
+		}
+
+		tmp = readl(scratch_reg1);
+		if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
+			if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
+				if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
+					dev_err(adev->dev,
+						"vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
+				} else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
+					dev_err(adev->dev,
+						"wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
+				} else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
+					dev_err(adev->dev,
+						"register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+				} else {
+					dev_err(adev->dev,
+						"unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
+				}
+			} else {
+				dev_err(adev->dev,
+					"timeout: rlcg faled to program reg: 0x%05x\n", offset);
+			}
+		}
+	}
+
+	ret = readl(scratch_reg0);
+
+	spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
+
+	return ret;
+}
+
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+		       u32 offset, u32 value,
+		       u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+	u32 rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
+
+	if (!amdgpu_sriov_runtime(adev) &&
+		amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
+		amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
+		return;
+	}
+
+	if (acc_flags & AMDGPU_REGS_NO_KIQ)
+		WREG32_NO_KIQ(offset, value);
+	else
+		WREG32(offset, value);
+}
+
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+		      u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
+{
+	u32 rlcg_flag;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
+
+	if (!amdgpu_sriov_runtime(adev) &&
+		amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
+		return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
+
+	if (acc_flags & AMDGPU_REGS_NO_KIQ)
+		return RREG32_NO_KIQ(offset);
+	else
+		return RREG32(offset);
+}
+
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
+{
+	bool xnack_mode = true;
+
+	if (amdgpu_sriov_vf(adev) &&
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+		xnack_mode = false;
+
+	return xnack_mode;
+}
+
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!amdgpu_sriov_ras_caps_en(adev))
+		return false;
+
+	if (adev->virt.ras_en_caps.bits.block_umc)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
+	if (adev->virt.ras_en_caps.bits.block_sdma)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
+	if (adev->virt.ras_en_caps.bits.block_gfx)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
+	if (adev->virt.ras_en_caps.bits.block_mmhub)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
+	if (adev->virt.ras_en_caps.bits.block_athub)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
+	if (adev->virt.ras_en_caps.bits.block_pcie_bif)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
+	if (adev->virt.ras_en_caps.bits.block_hdp)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
+	if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
+	if (adev->virt.ras_en_caps.bits.block_df)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
+	if (adev->virt.ras_en_caps.bits.block_smn)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
+	if (adev->virt.ras_en_caps.bits.block_sem)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
+	if (adev->virt.ras_en_caps.bits.block_mp0)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
+	if (adev->virt.ras_en_caps.bits.block_mp1)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
+	if (adev->virt.ras_en_caps.bits.block_fuse)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
+	if (adev->virt.ras_en_caps.bits.block_mca)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
+	if (adev->virt.ras_en_caps.bits.block_vcn)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
+	if (adev->virt.ras_en_caps.bits.block_jpeg)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
+	if (adev->virt.ras_en_caps.bits.block_ih)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
+	if (adev->virt.ras_en_caps.bits.block_mpio)
+		adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
+
+	if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
+		con->poison_supported = true; /* Poison is handled by host */
+
+	return true;
+}
+
+static inline enum amd_sriov_ras_telemetry_gpu_block
+amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
+	switch (block) {
+	case AMDGPU_RAS_BLOCK__UMC:
+		return RAS_TELEMETRY_GPU_BLOCK_UMC;
+	case AMDGPU_RAS_BLOCK__SDMA:
+		return RAS_TELEMETRY_GPU_BLOCK_SDMA;
+	case AMDGPU_RAS_BLOCK__GFX:
+		return RAS_TELEMETRY_GPU_BLOCK_GFX;
+	case AMDGPU_RAS_BLOCK__MMHUB:
+		return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
+	case AMDGPU_RAS_BLOCK__ATHUB:
+		return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
+	case AMDGPU_RAS_BLOCK__PCIE_BIF:
+		return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
+	case AMDGPU_RAS_BLOCK__HDP:
+		return RAS_TELEMETRY_GPU_BLOCK_HDP;
+	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+		return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
+	case AMDGPU_RAS_BLOCK__DF:
+		return RAS_TELEMETRY_GPU_BLOCK_DF;
+	case AMDGPU_RAS_BLOCK__SMN:
+		return RAS_TELEMETRY_GPU_BLOCK_SMN;
+	case AMDGPU_RAS_BLOCK__SEM:
+		return RAS_TELEMETRY_GPU_BLOCK_SEM;
+	case AMDGPU_RAS_BLOCK__MP0:
+		return RAS_TELEMETRY_GPU_BLOCK_MP0;
+	case AMDGPU_RAS_BLOCK__MP1:
+		return RAS_TELEMETRY_GPU_BLOCK_MP1;
+	case AMDGPU_RAS_BLOCK__FUSE:
+		return RAS_TELEMETRY_GPU_BLOCK_FUSE;
+	case AMDGPU_RAS_BLOCK__MCA:
+		return RAS_TELEMETRY_GPU_BLOCK_MCA;
+	case AMDGPU_RAS_BLOCK__VCN:
+		return RAS_TELEMETRY_GPU_BLOCK_VCN;
+	case AMDGPU_RAS_BLOCK__JPEG:
+		return RAS_TELEMETRY_GPU_BLOCK_JPEG;
+	case AMDGPU_RAS_BLOCK__IH:
+		return RAS_TELEMETRY_GPU_BLOCK_IH;
+	case AMDGPU_RAS_BLOCK__MPIO:
+		return RAS_TELEMETRY_GPU_BLOCK_MPIO;
+	default:
+		dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
+			      block);
+		return RAS_TELEMETRY_GPU_BLOCK_COUNT;
+	}
+}
+
+static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
+					       struct amdsriov_ras_telemetry *host_telemetry)
+{
+	struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
+	uint32_t checksum, used_size;
+
+	checksum = host_telemetry->header.checksum;
+	used_size = host_telemetry->header.used_size;
+
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+		return 0;
+
+	tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+		goto out;
+
+	memcpy(&adev->virt.count_cache, tmp,
+	       min(used_size, sizeof(adev->virt.count_cache)));
+out:
+	kfree(tmp);
+
+	return 0;
+}
+
+static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (!virt->ops || !virt->ops->req_ras_err_count)
+		return -EOPNOTSUPP;
+
+	/* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+	 * will ignore incoming guest messages. Ratelimit the guest messages to
+	 * prevent guest self DOS.
+	 */
+	if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
+		mutex_lock(&virt->ras.ras_telemetry_mutex);
+		if (!virt->ops->req_ras_err_count(adev))
+			amdgpu_virt_cache_host_error_counts(adev,
+				virt->fw_reserve.ras_telemetry);
+		mutex_unlock(&virt->ras.ras_telemetry_mutex);
+	}
+
+	return 0;
+}
+
+/* Bypass ACA interface and query ECC counts directly from host */
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+				  struct ras_err_data *err_data)
+{
+	enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+	sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+	if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+	    !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+		return -EOPNOTSUPP;
+
+	/* Host Access may be lost during reset, just return last cached data. */
+	if (down_read_trylock(&adev->reset_domain->sem)) {
+		amdgpu_virt_req_ras_err_count_internal(adev, false);
+		up_read(&adev->reset_domain->sem);
+	}
+
+	err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
+	err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
+	err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
+
+	return 0;
+}
+
+static int
+amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
+				struct amdsriov_ras_telemetry *host_telemetry,
+				u32 *more)
+{
+	struct amd_sriov_ras_cper_dump *cper_dump = NULL;
+	struct cper_hdr *entry = NULL;
+	struct amdgpu_ring *ring = &adev->cper.ring_buf;
+	uint32_t checksum, used_size, i;
+	int ret = 0;
+
+	checksum = host_telemetry->header.checksum;
+	used_size = host_telemetry->header.used_size;
+
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+		return -EINVAL;
+
+	cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
+	if (!cper_dump)
+		return -ENOMEM;
+
+	if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*more = cper_dump->more;
+
+	if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
+		dev_warn(
+			adev->dev,
+			"guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
+			adev->virt.ras.cper_rptr, cper_dump->wptr);
+
+		adev->virt.ras.cper_rptr = cper_dump->wptr;
+		goto out;
+	}
+
+	entry = (struct cper_hdr *)&cper_dump->buf[0];
+
+	for (i = 0; i < cper_dump->count; i++) {
+		amdgpu_cper_ring_write(ring, entry, entry->record_length);
+		entry = (struct cper_hdr *)((char *)entry +
+					    entry->record_length);
+	}
+
+	if (cper_dump->overflow_count)
+		dev_warn(adev->dev,
+			 "host reported CPER overflow of 0x%llx entries!\n",
+			 cper_dump->overflow_count);
+
+	adev->virt.ras.cper_rptr = cper_dump->wptr;
+out:
+	kfree(cper_dump);
+
+	return ret;
+}
+
+static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int ret = 0;
+	uint32_t more = 0;
+
+	if (!virt->ops || !virt->ops->req_ras_cper_dump)
+		return -EOPNOTSUPP;
+
+	do {
+		if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
+			ret = amdgpu_virt_write_cpers_to_ring(
+				adev, virt->fw_reserve.ras_telemetry, &more);
+		else
+			ret = 0;
+	} while (more && !ret);
+
+	return ret;
+}
+
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int ret = 0;
+
+	if (!amdgpu_sriov_ras_cper_en(adev))
+		return -EOPNOTSUPP;
+
+	if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
+	    down_read_trylock(&adev->reset_domain->sem)) {
+		mutex_lock(&virt->ras.ras_telemetry_mutex);
+		ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
+		mutex_unlock(&virt->ras.ras_telemetry_mutex);
+		up_read(&adev->reset_domain->sem);
+	}
+
+	return ret;
+}
+
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
+{
+	unsigned long ue_count, ce_count;
+
+	if (amdgpu_sriov_ras_telemetry_en(adev)) {
+		amdgpu_virt_req_ras_err_count_internal(adev, true);
+		amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
+	}
+
+	return 0;
+}
+
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+					enum amdgpu_ras_block block)
+{
+	enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+	sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+	if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+	    !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+		return false;
+
+	return true;
+}
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->req_bad_pages)
+		virt->ops->req_bad_pages(adev);
+}
+
+static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
+					   struct amdsriov_ras_telemetry *host_telemetry,
+					   bool *hit)
+{
+	struct amd_sriov_ras_chk_criti *tmp = NULL;
+	uint32_t checksum, used_size;
+
+	checksum = host_telemetry->header.checksum;
+	used_size = host_telemetry->header.used_size;
+
+	if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
+		return 0;
+
+	tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+		goto out;
+
+	if (hit)
+		*hit = tmp->hit ? true : false;
+
+out:
+	kfree(tmp);
+
+	return 0;
+}
+
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+	int r = -EPERM;
+
+	if (!virt->ops || !virt->ops->req_ras_chk_criti)
+		return -EOPNOTSUPP;
+
+	/* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+	 * will ignore incoming guest messages. Ratelimit the guest messages to
+	 * prevent guest self DOS.
+	 */
+	if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
+		mutex_lock(&virt->ras.ras_telemetry_mutex);
+		if (!virt->ops->req_ras_chk_criti(adev, addr))
+			r = amdgpu_virt_cache_chk_criti_hit(
+				adev, virt->fw_reserve.ras_telemetry, hit);
+		mutex_unlock(&virt->ras.ras_telemetry_mutex);
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
new file mode 100644
index 000000000000..01d5bca2dee1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Monk.liu@amd.com
+ */
+#ifndef AMDGPU_VIRT_H
+#define AMDGPU_VIRT_H
+
+#include "amdgv_sriovmsg.h"
+
+#define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS  (1 << 0) /* vBIOS is sr-iov ready */
+#define AMDGPU_SRIOV_CAPS_ENABLE_IOV   (1 << 1) /* sr-iov is enabled on this GPU */
+#define AMDGPU_SRIOV_CAPS_IS_VF        (1 << 2) /* this GPU is a virtual function */
+#define AMDGPU_PASSTHROUGH_MODE        (1 << 3) /* thw whole GPU is pass through for VM */
+#define AMDGPU_SRIOV_CAPS_RUNTIME      (1 << 4) /* is out of full access mode */
+#define AMDGPU_VF_MMIO_ACCESS_PROTECT  (1 << 5) /* MMIO write access is not allowed in sriov runtime */
+
+/* flags for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_GC_WRITE_LEGACY    (0x8 << 28)
+#define AMDGPU_RLCG_GC_WRITE           (0x0 << 28)
+#define AMDGPU_RLCG_GC_READ            (0x1 << 28)
+#define AMDGPU_RLCG_MMHUB_WRITE        (0x2 << 28)
+
+/* error code for indirect register access path supported by rlcg for sriov */
+#define AMDGPU_RLCG_VFGATE_DISABLED		0x4000000
+#define AMDGPU_RLCG_WRONG_OPERATION_TYPE	0x2000000
+#define AMDGPU_RLCG_REG_NOT_IN_RANGE		0x1000000
+
+#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK	0xFFFFF
+#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK	0xF000000
+
+/* all asic after AI use this offset */
+#define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5
+/* tonga/fiji use this offset */
+#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 2
+
+/* Signature used to validate the SR-IOV dynamic critical region init data header ("INDA") */
+#define AMDGPU_SRIOV_CRIT_DATA_SIGNATURE "INDA"
+#define AMDGPU_SRIOV_CRIT_DATA_SIG_LEN   4
+
+#define IS_SRIOV_CRIT_REGN_ENTRY_VALID(hdr, id) ((hdr)->valid_tables & (1 << (id)))
+
+enum amdgpu_sriov_vf_mode {
+	SRIOV_VF_MODE_BARE_METAL = 0,
+	SRIOV_VF_MODE_ONE_VF,
+	SRIOV_VF_MODE_MULTI_VF,
+};
+
+struct amdgpu_mm_table {
+	struct amdgpu_bo	*bo;
+	uint32_t		*cpu_addr;
+	uint64_t		gpu_addr;
+};
+
+#define AMDGPU_VF_ERROR_ENTRY_SIZE    16
+
+/* struct error_entry - amdgpu VF error information. */
+struct amdgpu_vf_error_buffer {
+	struct mutex lock;
+	int read_count;
+	int write_count;
+	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
+	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
+	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
+};
+
+enum idh_request;
+
+/**
+ * struct amdgpu_virt_ops - amdgpu device virt operations
+ */
+struct amdgpu_virt_ops {
+	int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
+	int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
+	int (*req_init_data)(struct amdgpu_device *adev);
+	int (*reset_gpu)(struct amdgpu_device *adev);
+	void (*ready_to_reset)(struct amdgpu_device *adev);
+	int (*wait_reset)(struct amdgpu_device *adev);
+	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
+			  u32 data1, u32 data2, u32 data3);
+	void (*ras_poison_handler)(struct amdgpu_device *adev,
+					enum amdgpu_ras_block block);
+	bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+	int (*req_ras_err_count)(struct amdgpu_device *adev);
+	int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+	int (*req_bad_pages)(struct amdgpu_device *adev);
+	int (*req_ras_chk_criti)(struct amdgpu_device *adev, u64 addr);
+};
+
+/*
+ * Firmware Reserve Frame buffer
+ */
+struct amdgpu_virt_fw_reserve {
+	struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
+	struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+	void *ras_telemetry;
+	unsigned int checksum_key;
+};
+
+/*
+ * Legacy GIM header
+ *
+ * Defination between PF and VF
+ * Structures forcibly aligned to 4 to keep the same style as PF.
+ */
+#define AMDGIM_DATAEXCHANGE_OFFSET		(64 * 1024)
+
+#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \
+		(total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2))
+
+enum AMDGIM_FEATURE_FLAG {
+	/* GIM supports feature of Error log collecting */
+	AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1,
+	/* GIM supports feature of loading uCodes */
+	AMDGIM_FEATURE_GIM_LOAD_UCODES   = 0x2,
+	/* VRAM LOST by GIM */
+	AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
+	/* MM bandwidth */
+	AMDGIM_FEATURE_GIM_MM_BW_MGR = 0x8,
+	/* PP ONE VF MODE in GIM */
+	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
+	/* Indirect Reg Access enabled */
+	AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+	/* AV1 Support MODE*/
+	AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
+	/* VCN RB decouple */
+	AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
+	/* MES info */
+	AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
+	AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+	AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
+	AMDGIM_FEATURE_RAS_CPER = (1 << 11),
+	AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK = (1 << 12),
+};
+
+enum AMDGIM_REG_ACCESS_FLAG {
+	/* Use PSP to program IH_RB_CNTL */
+	AMDGIM_FEATURE_IH_REG_PSP_EN      = (1 << 0),
+	/* Use RLC to program MMHUB regs */
+	AMDGIM_FEATURE_MMHUB_REG_RLC_EN   = (1 << 1),
+	/* Use RLC to program GC regs */
+	AMDGIM_FEATURE_GC_REG_RLC_EN      = (1 << 2),
+	/* Use PSP to program L1_TLB_CNTL */
+	AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
+	/* Use RLCG to program SQ_CONFIG1 */
+	AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4),
+};
+
+struct amdgim_pf2vf_info_v1 {
+	/* header contains size and version */
+	struct amd_sriov_msg_pf2vf_info_header header;
+	/* max_width * max_height */
+	unsigned int uvd_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	unsigned int uvd_enc_max_bandwidth;
+	/* max_width * max_height */
+	unsigned int vce_enc_max_pixels_count;
+	/* 16x16 pixels/sec, codec independent */
+	unsigned int vce_enc_max_bandwidth;
+	/* MEC FW position in kb from the start of visible frame buffer */
+	unsigned int mecfw_kboffset;
+	/* The features flags of the GIM driver supports. */
+	unsigned int feature_flags;
+	/* use private key from mailbox 2 to create chueksum */
+	unsigned int checksum;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v1 {
+	/* header contains size and version */
+	struct amd_sriov_msg_vf2pf_info_header header;
+	/* driver version */
+	char driver_version[64];
+	/* driver certification, 1=WHQL, 0=None */
+	unsigned int driver_cert;
+	/* guest OS type and version: need a define */
+	unsigned int os_info;
+	/* in the unit of 1M */
+	unsigned int fb_usage;
+	/* guest gfx engine usage percentage */
+	unsigned int gfx_usage;
+	/* guest gfx engine health percentage */
+	unsigned int gfx_health;
+	/* guest compute engine usage percentage */
+	unsigned int compute_usage;
+	/* guest compute engine health percentage */
+	unsigned int compute_health;
+	/* guest vce engine usage percentage. 0xffff means N/A. */
+	unsigned int vce_enc_usage;
+	/* guest vce engine health percentage. 0xffff means N/A. */
+	unsigned int vce_enc_health;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	unsigned int uvd_enc_usage;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	unsigned int uvd_enc_health;
+	unsigned int checksum;
+} __aligned(4);
+
+struct amdgim_vf2pf_info_v2 {
+	/* header contains size and version */
+	struct amd_sriov_msg_vf2pf_info_header header;
+	uint32_t checksum;
+	/* driver version */
+	uint8_t driver_version[64];
+	/* driver certification, 1=WHQL, 0=None */
+	uint32_t driver_cert;
+	/* guest OS type and version: need a define */
+	uint32_t os_info;
+	/* in the unit of 1M */
+	uint32_t fb_usage;
+	/* guest gfx engine usage percentage */
+	uint32_t gfx_usage;
+	/* guest gfx engine health percentage */
+	uint32_t gfx_health;
+	/* guest compute engine usage percentage */
+	uint32_t compute_usage;
+	/* guest compute engine health percentage */
+	uint32_t compute_health;
+	/* guest vce engine usage percentage. 0xffff means N/A. */
+	uint32_t vce_enc_usage;
+	/* guest vce engine health percentage. 0xffff means N/A. */
+	uint32_t vce_enc_health;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	uint32_t uvd_enc_usage;
+	/* guest uvd engine usage percentage. 0xffff means N/A. */
+	uint32_t uvd_enc_health;
+	uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
+} __aligned(4);
+
+struct amdgpu_virt_ras_err_handler_data {
+	/* point to bad page records array */
+	struct eeprom_table_record *bps;
+	/* point to reserved bo array */
+	struct amdgpu_bo **bps_bo;
+	/* the count of entries */
+	int count;
+	/* last reserved entry's index + 1 */
+	int last_reserved;
+};
+
+struct amdgpu_virt_ras {
+	struct ratelimit_state ras_error_cnt_rs;
+	struct ratelimit_state ras_cper_dump_rs;
+	struct ratelimit_state ras_chk_criti_rs;
+	struct mutex ras_telemetry_mutex;
+	uint64_t cper_rptr;
+};
+
+#define AMDGPU_VIRT_CAPS_LIST(X) X(AMDGPU_VIRT_CAP_POWER_LIMIT)
+
+DECLARE_ATTR_CAP_CLASS(amdgpu_virt, AMDGPU_VIRT_CAPS_LIST);
+
+struct amdgpu_virt_region {
+	uint32_t offset;
+	uint32_t size_kb;
+};
+
+/* GPU virtualization */
+struct amdgpu_virt {
+	uint32_t			caps;
+	struct amdgpu_bo		*csa_obj;
+	void				*csa_cpu_addr;
+	bool chained_ib_support;
+	uint32_t			reg_val_offs;
+	struct amdgpu_irq_src		ack_irq;
+	struct amdgpu_irq_src		rcv_irq;
+
+	struct work_struct		flr_work;
+	struct work_struct		req_bad_pages_work;
+	struct work_struct		handle_bad_pages_work;
+
+	struct amdgpu_mm_table		mm_table;
+	const struct amdgpu_virt_ops	*ops;
+	struct amdgpu_vf_error_buffer	vf_errors;
+	struct amdgpu_virt_fw_reserve	fw_reserve;
+	struct amdgpu_virt_caps virt_caps;
+	uint32_t gim_feature;
+	uint32_t reg_access_mode;
+	int req_init_data_ver;
+	bool tdr_debug;
+	struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
+	bool ras_init_done;
+	uint32_t reg_access;
+
+	/* dynamic(v2) critical regions */
+	struct amdgpu_virt_region init_data_header;
+	struct amdgpu_virt_region crit_regn;
+	struct amdgpu_virt_region crit_regn_tbl[AMD_SRIOV_MSG_MAX_TABLE_ID];
+	bool is_dynamic_crit_regn_enabled;
+
+	/* vf2pf message */
+	struct delayed_work vf2pf_work;
+	uint32_t vf2pf_update_interval_ms;
+	int vf2pf_update_retry_cnt;
+
+	/* multimedia bandwidth config */
+	bool     is_mm_bw_enabled;
+	uint32_t decode_max_dimension_pixels;
+	uint32_t decode_max_frame_pixels;
+	uint32_t encode_max_dimension_pixels;
+	uint32_t encode_max_frame_pixels;
+
+	/* the ucode id to signal the autoload */
+	uint32_t autoload_ucode_id;
+
+	/* Spinlock to protect access to the RLCG register interface */
+	spinlock_t rlcg_reg_lock;
+
+	struct mutex access_req_mutex;
+
+	union amd_sriov_ras_caps ras_en_caps;
+	union amd_sriov_ras_caps ras_telemetry_en_caps;
+	struct amdgpu_virt_ras ras;
+	struct amd_sriov_ras_telemetry_error_count count_cache;
+
+	/* hibernate and resume with different VF feature for xgmi enabled system */
+	bool is_xgmi_node_migrate_enabled;
+};
+
+struct amdgpu_video_codec_info;
+
+#define amdgpu_sriov_enabled(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
+
+#define amdgpu_sriov_vf(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_IS_VF)
+
+#define amdgpu_sriov_bios(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)
+
+#define amdgpu_sriov_runtime(adev) \
+((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)
+
+#define amdgpu_sriov_fullaccess(adev) \
+(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))
+
+#define amdgpu_sriov_reg_indirect_en(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.gim_feature & (AMDGIM_FEATURE_INDIRECT_REG_ACCESS)))
+
+#define amdgpu_sriov_reg_indirect_ih(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_IH_REG_PSP_EN)))
+
+#define amdgpu_sriov_reg_indirect_mmhub(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_MMHUB_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_gc(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
+#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
+        (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
+
+#define amdgpu_sriov_reg_access_sq_config(adev) \
+(amdgpu_sriov_vf((adev)) && \
+	((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG)))
+
+#define amdgpu_passthrough(adev) \
+((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)
+
+#define amdgpu_sriov_vf_mmio_access_protection(adev) \
+((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+
+#define amdgpu_sriov_ras_caps_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
+#define amdgpu_sriov_ras_cper_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+
+#define amdgpu_sriov_xgmi_ta_ext_peer_link_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_XGMI_TA_EXT_PEER_LINK)
+
+static inline bool is_virtual_machine(void)
+{
+#if defined(CONFIG_X86)
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#elif defined(CONFIG_ARM64)
+	return !is_kernel_in_hyp_mode();
+#else
+	return false;
+#endif
+}
+
+#define amdgpu_sriov_is_pp_one_vf(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_multi_vf_mode(adev) \
+	(amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+#define amdgpu_sriov_is_debug(adev) \
+	((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
+#define amdgpu_sriov_is_normal(adev) \
+	((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
+#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
+#define amdgpu_sriov_is_mes_info_enable(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
+
+#define amdgpu_virt_xgmi_migrate_enabled(adev) \
+	((adev)->virt.is_xgmi_node_migrate_enabled && (adev)->gmc.xgmi.node_segment_size != 0)
+
+bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
+void amdgpu_virt_init_setting(struct amdgpu_device *adev);
+int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
+int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
+int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
+void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
+int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
+int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
+void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
+bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev);
+void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
+void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
+void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
+void amdgpu_virt_init(struct amdgpu_device *adev);
+
+int amdgpu_virt_init_critical_region(struct amdgpu_device *adev);
+int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
+	int data_id, uint8_t *binary, u32 *size);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
+
+enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev);
+
+void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
+			struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
+			struct amdgpu_video_codec_info *decode, uint32_t decode_array_size);
+void amdgpu_sriov_wreg(struct amdgpu_device *adev,
+		       u32 offset, u32 value,
+		       u32 acc_flags, u32 hwip, u32 xcc_id);
+u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
+		      u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id);
+bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
+			uint32_t ucode_id);
+void amdgpu_virt_pre_reset(struct amdgpu_device *adev);
+void amdgpu_virt_post_reset(struct amdgpu_device *adev);
+bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev);
+bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
+					  u32 acc_flags, u32 hwip,
+					  bool write, u32 *rlcg_flag);
+u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+				  struct ras_err_data *err_data);
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+					enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
+int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
new file mode 100644
index 000000000000..79bad9cbe2ab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#ifdef CONFIG_DRM_AMDGPU_SI
+#include "dce_v6_0.h"
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+#include "dce_v8_0.h"
+#endif
+#include "dce_v10_0.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+#include "amdgpu_vkms.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "amdgpu_irq.h"
+
+/**
+ * DOC: amdgpu_vkms
+ *
+ * The amdgpu vkms interface provides a virtual KMS interface for several use
+ * cases: devices without display hardware, platforms where the actual display
+ * hardware is not useful (e.g., servers), SR-IOV virtual functions, device
+ * emulation/simulation, and device bring up prior to display hardware being
+ * usable. We previously emulated a legacy KMS interface, but there was a desire
+ * to move to the atomic KMS interface. The vkms driver did everything we
+ * needed, but we wanted KMS support natively in the driver without buffer
+ * sharing and the ability to support an instance of VKMS per device. We first
+ * looked at splitting vkms into a stub driver and a helper module that other
+ * drivers could use to implement a virtual display, but this strategy ended up
+ * being messy due to driver specific callbacks needed for buffer management.
+ * Ultimately, it proved easier to import the vkms code as it mostly used core
+ * drm helpers anyway.
+ */
+
+static const u32 amdgpu_vkms_formats[] = {
+	DRM_FORMAT_XRGB8888,
+};
+
+static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
+{
+	struct amdgpu_crtc *amdgpu_crtc = container_of(timer, struct amdgpu_crtc, vblank_timer);
+	struct drm_crtc *crtc = &amdgpu_crtc->base;
+	struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+	u64 ret_overrun;
+	bool ret;
+
+	ret_overrun = hrtimer_forward_now(&amdgpu_crtc->vblank_timer,
+					  output->period_ns);
+	if (ret_overrun != 1)
+		DRM_WARN("%s: vblank timer overrun\n", __func__);
+
+	ret = drm_crtc_handle_vblank(crtc);
+	/* Don't queue timer again when vblank is disabled. */
+	if (!ret)
+		return HRTIMER_NORESTART;
+
+	return HRTIMER_RESTART;
+}
+
+static int amdgpu_vkms_enable_vblank(struct drm_crtc *crtc)
+{
+	struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+	struct amdgpu_vkms_output *out = drm_crtc_to_amdgpu_vkms_output(crtc);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	drm_calc_timestamping_constants(crtc, &crtc->mode);
+
+	out->period_ns = ktime_set(0, vblank->framedur_ns);
+	hrtimer_start(&amdgpu_crtc->vblank_timer, out->period_ns, HRTIMER_MODE_REL);
+
+	return 0;
+}
+
+static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
+}
+
+static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
+					     int *max_error,
+					     ktime_t *vblank_time,
+					     bool in_vblank_irq)
+{
+	struct amdgpu_vkms_output *output = drm_crtc_to_amdgpu_vkms_output(crtc);
+	struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (!READ_ONCE(vblank->enabled)) {
+		*vblank_time = ktime_get();
+		return true;
+	}
+
+	*vblank_time = READ_ONCE(amdgpu_crtc->vblank_timer.node.expires);
+
+	if (WARN_ON(*vblank_time == vblank->time))
+		return true;
+
+	/*
+	 * To prevent races we roll the hrtimer forward before we do any
+	 * interrupt processing - this is how real hw works (the interrupt is
+	 * only generated after all the vblank registers are updated) and what
+	 * the vblank core expects. Therefore we need to always correct the
+	 * timestampe by one frame.
+	 */
+	*vblank_time -= output->period_ns;
+
+	return true;
+}
+
+static const struct drm_crtc_funcs amdgpu_vkms_crtc_funcs = {
+	.set_config             = drm_atomic_helper_set_config,
+	.destroy                = drm_crtc_cleanup,
+	.page_flip              = drm_atomic_helper_page_flip,
+	.reset                  = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state   = drm_atomic_helper_crtc_destroy_state,
+	.enable_vblank		= amdgpu_vkms_enable_vblank,
+	.disable_vblank		= amdgpu_vkms_disable_vblank,
+	.get_vblank_timestamp	= amdgpu_vkms_get_vblank_timestamp,
+};
+
+static void amdgpu_vkms_crtc_atomic_enable(struct drm_crtc *crtc,
+					   struct drm_atomic_state *state)
+{
+	drm_crtc_vblank_on(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_disable(struct drm_crtc *crtc,
+					    struct drm_atomic_state *state)
+{
+	drm_crtc_vblank_off(crtc);
+}
+
+static void amdgpu_vkms_crtc_atomic_flush(struct drm_crtc *crtc,
+					  struct drm_atomic_state *state)
+{
+	unsigned long flags;
+	if (crtc->state->event) {
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+		if (drm_crtc_vblank_get(crtc) != 0)
+			drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		else
+			drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+		crtc->state->event = NULL;
+	}
+}
+
+static const struct drm_crtc_helper_funcs amdgpu_vkms_crtc_helper_funcs = {
+	.atomic_flush	= amdgpu_vkms_crtc_atomic_flush,
+	.atomic_enable	= amdgpu_vkms_crtc_atomic_enable,
+	.atomic_disable	= amdgpu_vkms_crtc_atomic_disable,
+};
+
+static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+			  struct drm_plane *primary, struct drm_plane *cursor)
+{
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	int ret;
+
+	ret = drm_crtc_init_with_planes(dev, crtc, primary, cursor,
+					&amdgpu_vkms_crtc_funcs, NULL);
+	if (ret) {
+		DRM_ERROR("Failed to init CRTC\n");
+		return ret;
+	}
+
+	drm_crtc_helper_add(crtc, &amdgpu_vkms_crtc_helper_funcs);
+
+	amdgpu_crtc->crtc_id = drm_crtc_index(crtc);
+	adev->mode_info.crtcs[drm_crtc_index(crtc)] = amdgpu_crtc;
+
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+	amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
+
+	hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC,
+		      HRTIMER_MODE_REL);
+
+	return ret;
+}
+
+static const struct drm_connector_funcs amdgpu_vkms_connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode = NULL;
+	unsigned i;
+	static const struct mode_size {
+		int w;
+		int h;
+	} common_modes[] = {
+		{ 640,  480},
+		{ 720,  480},
+		{ 800,  600},
+		{ 848,  480},
+		{1024,  768},
+		{1152,  768},
+		{1280,  720},
+		{1280,  800},
+		{1280,  854},
+		{1280,  960},
+		{1280, 1024},
+		{1440,  900},
+		{1400, 1050},
+		{1680, 1050},
+		{1600, 1200},
+		{1920, 1080},
+		{1920, 1200},
+		{2560, 1440},
+		{4096, 3112},
+		{3656, 2664},
+		{3840, 2160},
+		{4096, 2160},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(common_modes); i++) {
+		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
+		if (!mode)
+			continue;
+		drm_mode_probed_add(connector, mode);
+	}
+
+	drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF);
+
+	return ARRAY_SIZE(common_modes);
+}
+
+static const struct drm_connector_helper_funcs amdgpu_vkms_conn_helper_funcs = {
+	.get_modes    = amdgpu_vkms_conn_get_modes,
+};
+
+static const struct drm_plane_funcs amdgpu_vkms_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.destroy		= drm_plane_cleanup,
+	.reset			= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static void amdgpu_vkms_plane_atomic_update(struct drm_plane *plane,
+					    struct drm_atomic_state *old_state)
+{
+	return;
+}
+
+static int amdgpu_vkms_plane_atomic_check(struct drm_plane *plane,
+					  struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+										 plane);
+	struct drm_crtc_state *crtc_state;
+	int ret;
+
+	if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc))
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state,
+					       new_plane_state->crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	ret = drm_atomic_helper_check_plane_state(new_plane_state, crtc_state,
+						  DRM_PLANE_NO_SCALING,
+						  DRM_PLANE_NO_SCALING,
+						  false, true);
+	if (ret != 0)
+		return ret;
+
+	/* for now primary plane must be visible and full screen */
+	if (!new_plane_state->visible)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
+				  struct drm_plane_state *new_state)
+{
+	struct amdgpu_framebuffer *afb;
+	struct drm_gem_object *obj;
+	struct amdgpu_device *adev;
+	struct amdgpu_bo *rbo;
+	uint32_t domain;
+	int r;
+
+	if (!new_state->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+	afb = to_amdgpu_framebuffer(new_state->fb);
+
+	obj = drm_gem_fb_get_obj(new_state->fb, 0);
+	if (!obj) {
+		DRM_ERROR("Failed to get obj from framebuffer\n");
+		return -EINVAL;
+	}
+
+	rbo = gem_to_amdgpu_bo(obj);
+	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
+
+	r = amdgpu_bo_reserve(rbo, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
+		return r;
+	}
+
+	r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+	if (r) {
+		dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+		goto error_unlock;
+	}
+
+	if (plane->type != DRM_PLANE_TYPE_CURSOR)
+		domain = amdgpu_display_supported_domains(adev, rbo->flags);
+	else
+		domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+	rbo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	r = amdgpu_bo_pin(rbo, domain);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
+		goto error_unlock;
+	}
+
+	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", rbo);
+		goto error_unpin;
+	}
+
+	amdgpu_bo_unreserve(rbo);
+
+	afb->address = amdgpu_bo_gpu_offset(rbo);
+
+	amdgpu_bo_ref(rbo);
+
+	return 0;
+
+error_unpin:
+	amdgpu_bo_unpin(rbo);
+
+error_unlock:
+	amdgpu_bo_unreserve(rbo);
+	return r;
+}
+
+static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
+				   struct drm_plane_state *old_state)
+{
+	struct amdgpu_bo *rbo;
+	struct drm_gem_object *obj;
+	int r;
+
+	if (!old_state->fb)
+		return;
+
+	obj = drm_gem_fb_get_obj(old_state->fb, 0);
+	if (!obj) {
+		DRM_ERROR("Failed to get obj from framebuffer\n");
+		return;
+	}
+
+	rbo = gem_to_amdgpu_bo(obj);
+	r = amdgpu_bo_reserve(rbo, false);
+	if (unlikely(r)) {
+		DRM_ERROR("failed to reserve rbo before unpin\n");
+		return;
+	}
+
+	amdgpu_bo_unpin(rbo);
+	amdgpu_bo_unreserve(rbo);
+	amdgpu_bo_unref(&rbo);
+}
+
+static const struct drm_plane_helper_funcs amdgpu_vkms_primary_helper_funcs = {
+	.atomic_update		= amdgpu_vkms_plane_atomic_update,
+	.atomic_check		= amdgpu_vkms_plane_atomic_check,
+	.prepare_fb		= amdgpu_vkms_prepare_fb,
+	.cleanup_fb		= amdgpu_vkms_cleanup_fb,
+};
+
+static struct drm_plane *amdgpu_vkms_plane_init(struct drm_device *dev,
+						enum drm_plane_type type,
+						int index)
+{
+	struct drm_plane *plane;
+	int ret;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_universal_plane_init(dev, plane, 1 << index,
+				       &amdgpu_vkms_plane_funcs,
+				       amdgpu_vkms_formats,
+				       ARRAY_SIZE(amdgpu_vkms_formats),
+				       NULL, type, NULL);
+	if (ret) {
+		kfree(plane);
+		return ERR_PTR(ret);
+	}
+
+	drm_plane_helper_add(plane, &amdgpu_vkms_primary_helper_funcs);
+
+	return plane;
+}
+
+static int amdgpu_vkms_output_init(struct drm_device *dev, struct
+				   amdgpu_vkms_output *output, int index)
+{
+	struct drm_connector *connector = &output->connector;
+	struct drm_encoder *encoder = &output->encoder;
+	struct drm_crtc *crtc = &output->crtc.base;
+	struct drm_plane *primary, *cursor = NULL;
+	int ret;
+
+	primary = amdgpu_vkms_plane_init(dev, DRM_PLANE_TYPE_PRIMARY, index);
+	if (IS_ERR(primary))
+		return PTR_ERR(primary);
+
+	ret = amdgpu_vkms_crtc_init(dev, crtc, primary, cursor);
+	if (ret)
+		goto err_crtc;
+
+	ret = drm_connector_init(dev, connector, &amdgpu_vkms_connector_funcs,
+				 DRM_MODE_CONNECTOR_VIRTUAL);
+	if (ret) {
+		DRM_ERROR("Failed to init connector\n");
+		goto err_connector;
+	}
+
+	drm_connector_helper_add(connector, &amdgpu_vkms_conn_helper_funcs);
+
+	ret = drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL);
+	if (ret) {
+		DRM_ERROR("Failed to init encoder\n");
+		goto err_encoder;
+	}
+	encoder->possible_crtcs = 1 << index;
+
+	ret = drm_connector_attach_encoder(connector, encoder);
+	if (ret) {
+		DRM_ERROR("Failed to attach connector to encoder\n");
+		goto err_attach;
+	}
+
+	drm_mode_config_reset(dev);
+
+	return 0;
+
+err_attach:
+	drm_encoder_cleanup(encoder);
+
+err_encoder:
+	drm_connector_cleanup(connector);
+
+err_connector:
+	drm_crtc_cleanup(crtc);
+
+err_crtc:
+	drm_plane_cleanup(primary);
+
+	return ret;
+}
+
+const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
+	.fb_create = amdgpu_display_user_framebuffer_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r, i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
+		sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
+	if (!adev->amdgpu_vkms_output)
+		return -ENOMEM;
+
+	adev_to_drm(adev)->max_vblank_count = 0;
+
+	adev_to_drm(adev)->mode_config.funcs = &amdgpu_vkms_mode_funcs;
+
+	adev_to_drm(adev)->mode_config.max_width = XRES_MAX;
+	adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
+
+	adev_to_drm(adev)->mode_config.preferred_depth = 24;
+	adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+	r = amdgpu_display_modeset_create_props(adev);
+	if (r)
+		return r;
+
+	/* allocate crtcs, encoders, connectors */
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = amdgpu_vkms_output_init(adev_to_drm(adev), &adev->amdgpu_vkms_output[i], i);
+		if (r)
+			return r;
+	}
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	drm_kms_helper_poll_init(adev_to_drm(adev));
+
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
+}
+
+static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int i = 0;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		if (adev->mode_info.crtcs[i])
+			hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
+
+	drm_kms_helper_poll_fini(adev_to_drm(adev));
+	drm_mode_config_cleanup(adev_to_drm(adev));
+
+	adev->mode_info.mode_config_initialized = false;
+
+	drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+	kfree(adev->amdgpu_vkms_output);
+	return 0;
+}
+
+static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+		dce_v6_0_disable_dce(adev);
+		break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		dce_v8_0_disable_dce(adev);
+		break;
+#endif
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+		dce_v10_0_disable_dce(adev);
+		break;
+	case CHIP_TOPAZ:
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_HAINAN:
+#endif
+		/* no DCE */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	return 0;
+}
+
+static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = drm_mode_config_helper_suspend(adev_to_drm(adev));
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+
+	r = amdgpu_vkms_hw_init(ip_block);
+	if (r)
+		return r;
+	return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev));
+}
+
+static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
+	.name = "amdgpu_vkms",
+	.sw_init = amdgpu_vkms_sw_init,
+	.sw_fini = amdgpu_vkms_sw_fini,
+	.hw_init = amdgpu_vkms_hw_init,
+	.hw_fini = amdgpu_vkms_hw_fini,
+	.suspend = amdgpu_vkms_suspend,
+	.resume = amdgpu_vkms_resume,
+	.is_idle = amdgpu_vkms_is_idle,
+	.set_clockgating_state = amdgpu_vkms_set_clockgating_state,
+	.set_powergating_state = amdgpu_vkms_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &amdgpu_vkms_ip_funcs,
+};
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
new file mode 100644
index 000000000000..4f8722ff37c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _AMDGPU_VKMS_H_
+#define _AMDGPU_VKMS_H_
+
+#define XRES_DEF  1024
+#define YRES_DEF   768
+
+#define XRES_MAX  16384
+#define YRES_MAX  16384
+
+#define drm_crtc_to_amdgpu_vkms_output(target) \
+	container_of(target, struct amdgpu_vkms_output, crtc.base)
+
+extern const struct amdgpu_ip_block_version amdgpu_vkms_ip_block;
+
+struct amdgpu_vkms_output {
+	struct amdgpu_crtc crtc;
+	struct drm_encoder encoder;
+	struct drm_connector connector;
+	ktime_t period_ns;
+	struct drm_pending_vblank_event *event;
+};
+
+#endif /* _AMDGPU_VKMS_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
new file mode 100644
index 000000000000..a67285118c37
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -0,0 +1,3209 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include <linux/dma-fence-array.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/idr.h>
+#include <linux/dma-buf.h>
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
+#include "kfd_svm.h"
+
+/**
+ * DOC: GPUVM
+ *
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time.  The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
+ * can be mapped as snooped (cached system pages) or unsnooped
+ * (uncached system pages).
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID.  When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
+ * buffer.  VMIDs are allocated dynamically as commands are submitted.
+ * The userspace drivers maintain their own address space and the kernel
+ * sets up their pages tables accordingly when they submit their
+ * command buffers and a VMID is assigned.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family.  GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures.  There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present).  These apertures provide direct access to these memories without
+ * incurring the overhead of a page table.  VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space.  The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process.  If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
+ */
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
+		     START, LAST, static, amdgpu_vm_it)
+
+#undef START
+#undef LAST
+
+/**
+ * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
+ */
+struct amdgpu_prt_cb {
+
+	/**
+	 * @adev: amdgpu device
+	 */
+	struct amdgpu_device *adev;
+
+	/**
+	 * @cb: callback
+	 */
+	struct dma_fence_cb cb;
+};
+
+/**
+ * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
+ */
+struct amdgpu_vm_tlb_seq_struct {
+	/**
+	 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+	 */
+	struct amdgpu_vm *vm;
+
+	/**
+	 * @cb: callback
+	 */
+	struct dma_fence_cb cb;
+};
+
+/**
+ * amdgpu_vm_assert_locked - check if VM is correctly locked
+ * @vm: the VM which schould be tested
+ *
+ * Asserts that the VM root PD is locked.
+ */
+static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
+{
+	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+}
+
+/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+	struct amdgpu_vm *vm = vm_bo->vm;
+	struct amdgpu_bo *bo = vm_bo->bo;
+
+	vm_bo->moved = true;
+	amdgpu_vm_assert_locked(vm);
+	spin_lock(&vm_bo->vm->status_lock);
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		list_move(&vm_bo->vm_status, &vm->evicted);
+	else
+		list_move_tail(&vm_bo->vm_status, &vm->evicted);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM BOs which are moved, but that change is not yet reflected
+ * in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+	amdgpu_vm_assert_locked(vm_bo->vm);
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+	amdgpu_vm_assert_locked(vm_bo->vm);
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+	spin_unlock(&vm_bo->vm->status_lock);
+	vm_bo->moved = false;
+}
+
+/**
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
+ *
+ * @vm_bo: vm_bo which is now invalidated
+ *
+ * State for normal BOs which are invalidated and that change not yet reflected
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
+{
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_evicted_user - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for BOs used by user mode queues which are not at the location they
+ * should be.
+ */
+static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
+{
+	vm_bo->moved = true;
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ * For the root PD, just move to idle state.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+	amdgpu_vm_assert_locked(vm_bo->vm);
+	if (vm_bo->bo->parent) {
+		spin_lock(&vm_bo->vm->status_lock);
+		list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+		spin_unlock(&vm_bo->vm->status_lock);
+	} else {
+		amdgpu_vm_bo_idle(vm_bo);
+	}
+}
+
+/**
+ * amdgpu_vm_bo_done - vm_bo is done
+ *
+ * @vm_bo: vm_bo which is now done
+ *
+ * State for normal BOs which are invalidated and that change has been updated
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
+{
+	amdgpu_vm_assert_locked(vm_bo->vm);
+	spin_lock(&vm_bo->vm->status_lock);
+	list_move(&vm_bo->vm_status, &vm_bo->vm->done);
+	spin_unlock(&vm_bo->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
+ * @vm: the VM which state machine to reset
+ *
+ * Move all vm_bo object in the VM into a state where they will be updated
+ * again during validation.
+ */
+static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
+{
+	struct amdgpu_vm_bo_base *vm_bo, *tmp;
+
+	amdgpu_vm_assert_locked(vm);
+
+	spin_lock(&vm->status_lock);
+	list_splice_init(&vm->done, &vm->invalidated);
+	list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
+		vm_bo->moved = true;
+
+	list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
+		struct amdgpu_bo *bo = vm_bo->bo;
+
+		vm_bo->moved = true;
+		if (!bo || bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+		else if (bo->parent)
+			list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+	}
+	spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
+ *
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+	struct amdgpu_vm *vm = base->vm;
+	struct amdgpu_bo *bo = base->bo;
+	uint64_t size = amdgpu_bo_size(bo);
+	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+	bool shared;
+
+	dma_resv_assert_held(bo->tbo.base.resv);
+	spin_lock(&vm->status_lock);
+	shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+	if (base->shared != shared) {
+		base->shared = shared;
+		if (shared) {
+			vm->stats[bo_memtype].drm.shared += size;
+			vm->stats[bo_memtype].drm.private -= size;
+		} else {
+			vm->stats[bo_memtype].drm.shared -= size;
+			vm->stats[bo_memtype].drm.private += size;
+		}
+	}
+	spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
+ *
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
+ */
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_bo_base *base;
+
+	for (base = bo->vm_bo; base; base = base->next)
+		amdgpu_vm_update_shared(base);
+}
+
+/**
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res:  the ttm_resource to use for the purpose of accounting, may or may not
+ *        be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
+ */
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+			    struct ttm_resource *res, int sign)
+{
+	struct amdgpu_vm *vm = base->vm;
+	struct amdgpu_bo *bo = base->bo;
+	int64_t size = sign * amdgpu_bo_size(bo);
+	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+
+	/* For drm-total- and drm-shared-, BO are accounted by their preferred
+	 * placement, see also amdgpu_bo_mem_stats_placement.
+	 */
+	if (base->shared)
+		vm->stats[bo_memtype].drm.shared += size;
+	else
+		vm->stats[bo_memtype].drm.private += size;
+
+	if (res && res->mem_type < __AMDGPU_PL_NUM) {
+		uint32_t res_memtype = res->mem_type;
+
+		vm->stats[res_memtype].drm.resident += size;
+		/* BO only count as purgeable if it is resident,
+		 * since otherwise there's nothing to purge.
+		 */
+		if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+			vm->stats[res_memtype].drm.purgeable += size;
+		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+			vm->stats[bo_memtype].evicted += size;
+	}
+}
+
+/**
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res:  the ttm_resource to use for the purpose of accounting, may or may not
+ *        be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Updates the basic memory stat when bo is added/deleted/moved.
+ */
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+			    struct ttm_resource *res, int sign)
+{
+	struct amdgpu_vm *vm = base->vm;
+
+	spin_lock(&vm->status_lock);
+	amdgpu_vm_update_stats_locked(base, res, sign);
+	spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+			    struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+	base->vm = vm;
+	base->bo = bo;
+	base->next = NULL;
+	INIT_LIST_HEAD(&base->vm_status);
+
+	if (!bo)
+		return;
+	base->next = bo->vm_bo;
+	bo->vm_bo = base;
+
+	spin_lock(&vm->status_lock);
+	base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+	amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+	spin_unlock(&vm->status_lock);
+
+	if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+		return;
+
+	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
+	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
+	if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
+		amdgpu_vm_bo_relocated(base);
+	else
+		amdgpu_vm_bo_idle(base);
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
+		return;
+
+	/*
+	 * we checked all the prerequisites, but it looks like this per vm bo
+	 * is currently evicted. add the bo to the evicted list to make sure it
+	 * is validated on next vm use to avoid fault.
+	 * */
+	amdgpu_vm_bo_evicted(base);
+}
+
+/**
+ * amdgpu_vm_lock_pd - lock PD in drm_exec
+ *
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the VM root PD in the DRM execution context.
+ */
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+		      unsigned int num_fences)
+{
+	/* We need at least two fences for the VM PD/PT updates */
+	return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
+				    2 + num_fences);
+}
+
+/**
+ * amdgpu_vm_lock_done_list - lock all BOs on the done list
+ * @vm: vm providing the BOs
+ * @exec: drm execution context
+ * @num_fences: number of extra fences to reserve
+ *
+ * Lock the BOs on the done list in the DRM execution context.
+ */
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+			     unsigned int num_fences)
+{
+	struct list_head *prev = &vm->done;
+	struct amdgpu_bo_va *bo_va;
+	struct amdgpu_bo *bo;
+	int ret;
+
+	/* We can only trust prev->next while holding the lock */
+	spin_lock(&vm->status_lock);
+	while (!list_is_head(prev->next, &vm->done)) {
+		bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
+
+		bo = bo_va->base.bo;
+		if (bo) {
+			amdgpu_bo_ref(bo);
+			spin_unlock(&vm->status_lock);
+
+			ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
+			amdgpu_bo_unref(&bo);
+			if (unlikely(ret))
+				return ret;
+
+			spin_lock(&vm->status_lock);
+		}
+		prev = prev->next;
+	}
+	spin_unlock(&vm->status_lock);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ *
+ * Move all BOs to the end of LRU and remember their positions to put them
+ * together.
+ */
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm)
+{
+	spin_lock(&adev->mman.bdev.lru_lock);
+	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
+	spin_unlock(&adev->mman.bdev.lru_lock);
+}
+
+/* Create scheduler entities for page table updates */
+static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
+				   struct amdgpu_vm *vm)
+{
+	int r;
+
+	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
+				  adev->vm_manager.vm_pte_scheds,
+				  adev->vm_manager.vm_pte_num_scheds, NULL);
+	if (r)
+		goto error;
+
+	return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+				     adev->vm_manager.vm_pte_scheds,
+				     adev->vm_manager.vm_pte_num_scheds, NULL);
+
+error:
+	drm_sched_entity_destroy(&vm->immediate);
+	return r;
+}
+
+/* Destroy the entities for page table updates again */
+static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
+{
+	drm_sched_entity_destroy(&vm->immediate);
+	drm_sched_entity_destroy(&vm->delayed);
+}
+
+/**
+ * amdgpu_vm_generation - return the page table re-generation counter
+ * @adev: the amdgpu_device
+ * @vm: optional VM to check, might be NULL
+ *
+ * Returns a page table re-generation token to allow checking if submissions
+ * are still valid to use this VM. The VM parameter might be NULL in which case
+ * just the VRAM lost counter will be used.
+ */
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
+
+	if (!vm)
+		return result;
+
+	result += lower_32_bits(vm->generation);
+	/* Add one if the page tables will be re-generated on next CS */
+	if (drm_sched_entity_error(&vm->delayed))
+		++result;
+
+	return result;
+}
+
+/**
+ * amdgpu_vm_validate - validate evicted BOs tracked in the VM
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ * @ticket: optional reservation ticket used to reserve the VM
+ * @validate: callback to do the validation
+ * @param: parameter for the validation callback
+ *
+ * Validate the page table BOs and per-VM BOs on command submission if
+ * necessary. If a ticket is given, also try to validate evicted user queue
+ * BOs. They must already be reserved with the given ticket.
+ *
+ * Returns:
+ * Validation result.
+ */
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		       struct ww_acquire_ctx *ticket,
+		       int (*validate)(void *p, struct amdgpu_bo *bo),
+		       void *param)
+{
+	uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
+	struct amdgpu_vm_bo_base *bo_base;
+	struct amdgpu_bo *bo;
+	int r;
+
+	if (vm->generation != new_vm_generation) {
+		vm->generation = new_vm_generation;
+		amdgpu_vm_bo_reset_state_machine(vm);
+		amdgpu_vm_fini_entities(vm);
+		r = amdgpu_vm_init_entities(adev, vm);
+		if (r)
+			return r;
+	}
+
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->evicted)) {
+		bo_base = list_first_entry(&vm->evicted,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_base->bo;
+
+		r = validate(param, bo);
+		if (r)
+			return r;
+
+		if (bo->tbo.type != ttm_bo_type_kernel) {
+			amdgpu_vm_bo_moved(bo_base);
+		} else {
+			vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
+			amdgpu_vm_bo_relocated(bo_base);
+		}
+		spin_lock(&vm->status_lock);
+	}
+	while (ticket && !list_empty(&vm->evicted_user)) {
+		bo_base = list_first_entry(&vm->evicted_user,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_base->bo;
+		dma_resv_assert_held(bo->tbo.base.resv);
+
+		r = validate(param, bo);
+		if (r)
+			return r;
+
+		amdgpu_vm_bo_invalidated(bo_base);
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
+
+	amdgpu_vm_eviction_lock(vm);
+	vm->evicting = false;
+	amdgpu_vm_eviction_unlock(vm);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_ready - check VM is ready for updates
+ *
+ * @vm: VM to check
+ *
+ * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+ * True if VM is not evicting and all VM entities are not stopped
+ */
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+{
+	bool ret;
+
+	amdgpu_vm_assert_locked(vm);
+
+	amdgpu_vm_eviction_lock(vm);
+	ret = !vm->evicting;
+	amdgpu_vm_eviction_unlock(vm);
+
+	spin_lock(&vm->status_lock);
+	ret &= list_empty(&vm->evicted);
+	spin_unlock(&vm->status_lock);
+
+	spin_lock(&vm->immediate.lock);
+	ret &= !vm->immediate.stopped;
+	spin_unlock(&vm->immediate.lock);
+
+	spin_lock(&vm->delayed.lock);
+	ret &= !vm->delayed.stopped;
+	spin_unlock(&vm->delayed.lock);
+
+	return ret;
+}
+
+/**
+ * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
+ *
+ * @adev: amdgpu_device pointer
+ */
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
+{
+	const struct amdgpu_ip_block *ip_block;
+	bool has_compute_vm_bug;
+	struct amdgpu_ring *ring;
+	int i;
+
+	has_compute_vm_bug = false;
+
+	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (ip_block) {
+		/* Compute has a VM bug for GFX version < 7.
+		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
+		if (ip_block->version->major <= 7)
+			has_compute_vm_bug = true;
+		else if (ip_block->version->major == 8)
+			if (adev->gfx.mec_fw_version < 673)
+				has_compute_vm_bug = true;
+	}
+
+	for (i = 0; i < adev->num_rings; i++) {
+		ring = adev->rings[i];
+		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+			/* only compute rings */
+			ring->has_compute_vm_bug = has_compute_vm_bug;
+		else
+			ring->has_compute_vm_bug = false;
+	}
+}
+
+/**
+ * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
+ *
+ * @ring: ring on which the job will be submitted
+ * @job: job to submit
+ *
+ * Returns:
+ * True if sync is needed.
+ */
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+				  struct amdgpu_job *job)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned vmhub = ring->vm_hub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+
+	if (job->vmid == 0)
+		return false;
+
+	if (job->vm_needs_flush || ring->has_compute_vm_bug)
+		return true;
+
+	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+		return true;
+
+	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
+		return true;
+
+	return false;
+}
+
+/**
+ * amdgpu_vm_flush - hardware flush the vm
+ *
+ * @ring: ring to use for flush
+ * @job:  related job
+ * @need_pipe_sync: is pipe sync needed
+ *
+ * Emit a VM flush when it is necessary.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
+		    bool need_pipe_sync)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+	unsigned vmhub = ring->vm_hub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
+	bool spm_update_needed = job->spm_update_needed;
+	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+		job->gds_switch_needed;
+	bool vm_flush_needed = job->vm_needs_flush;
+	bool cleaner_shader_needed = false;
+	bool pasid_mapping_needed = false;
+	struct dma_fence *fence = NULL;
+	unsigned int patch;
+	int r;
+
+	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
+		gds_switch_needed = true;
+		vm_flush_needed = true;
+		pasid_mapping_needed = true;
+		spm_update_needed = true;
+	}
+
+	mutex_lock(&id_mgr->lock);
+	if (id->pasid != job->pasid || !id->pasid_mapping ||
+	    !dma_fence_is_signaled(id->pasid_mapping))
+		pasid_mapping_needed = true;
+	mutex_unlock(&id_mgr->lock);
+
+	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
+			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
+	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+		ring->funcs->emit_wreg;
+
+	cleaner_shader_needed = job->run_cleaner_shader &&
+		adev->gfx.enable_cleaner_shader &&
+		ring->funcs->emit_cleaner_shader && job->base.s_fence &&
+		&job->base.s_fence->scheduled == isolation->spearhead;
+
+	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+	    !cleaner_shader_needed)
+		return 0;
+
+	amdgpu_ring_ib_begin(ring);
+	if (ring->funcs->init_cond_exec)
+		patch = amdgpu_ring_init_cond_exec(ring,
+						   ring->cond_exe_gpu_addr);
+
+	if (need_pipe_sync)
+		amdgpu_ring_emit_pipeline_sync(ring);
+
+	if (cleaner_shader_needed)
+		ring->funcs->emit_cleaner_shader(ring);
+
+	if (vm_flush_needed) {
+		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
+		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+	}
+
+	if (pasid_mapping_needed)
+		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
+
+	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+		adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
+
+	if (ring->funcs->emit_gds_switch &&
+	    gds_switch_needed) {
+		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+					    job->gds_size, job->gws_base,
+					    job->gws_size, job->oa_base,
+					    job->oa_size);
+	}
+
+	if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
+		r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
+		if (r)
+			return r;
+		fence = &job->hw_vm_fence->base;
+		/* get a ref for the job */
+		dma_fence_get(fence);
+	}
+
+	if (vm_flush_needed) {
+		mutex_lock(&id_mgr->lock);
+		dma_fence_put(id->last_flush);
+		id->last_flush = dma_fence_get(fence);
+		id->current_gpu_reset_count =
+			atomic_read(&adev->gpu_reset_counter);
+		mutex_unlock(&id_mgr->lock);
+	}
+
+	if (pasid_mapping_needed) {
+		mutex_lock(&id_mgr->lock);
+		id->pasid = job->pasid;
+		dma_fence_put(id->pasid_mapping);
+		id->pasid_mapping = dma_fence_get(fence);
+		mutex_unlock(&id_mgr->lock);
+	}
+
+	/*
+	 * Make sure that all other submissions wait for the cleaner shader to
+	 * finish before we push them to the HW.
+	 */
+	if (cleaner_shader_needed) {
+		trace_amdgpu_cleaner_shader(ring, fence);
+		mutex_lock(&adev->enforce_isolation_mutex);
+		dma_fence_put(isolation->spearhead);
+		isolation->spearhead = dma_fence_get(fence);
+		mutex_unlock(&adev->enforce_isolation_mutex);
+	}
+	dma_fence_put(fence);
+
+	amdgpu_ring_patch_cond_exec(ring, patch);
+
+	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+	if (ring->funcs->emit_switch_buffer) {
+		amdgpu_ring_emit_switch_buffer(ring);
+		amdgpu_ring_emit_switch_buffer(ring);
+	}
+
+	amdgpu_ring_ib_end(ring);
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
+ *
+ * @vm: requested vm
+ * @bo: requested buffer object
+ *
+ * Find @bo inside the requested vm.
+ * Search inside the @bos vm list for the requested vm
+ * Returns the found bo_va or NULL if none is found
+ *
+ * Object has to be reserved!
+ *
+ * Returns:
+ * Found bo_va or NULL.
+ */
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+				       struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_bo_base *base;
+
+	for (base = bo->vm_bo; base; base = base->next) {
+		if (base->vm != vm)
+			continue;
+
+		return container_of(base, struct amdgpu_bo_va, base);
+	}
+	return NULL;
+}
+
+/**
+ * amdgpu_vm_map_gart - Resolve gart mapping of addr
+ *
+ * @pages_addr: optional DMA address to use for lookup
+ * @addr: the unmapped addr
+ *
+ * Look up the physical address of the page that the pte resolves
+ * to.
+ *
+ * Returns:
+ * The pointer for the page table entry.
+ */
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
+{
+	uint64_t result;
+
+	/* page table offset */
+	result = pages_addr[addr >> PAGE_SHIFT];
+
+	/* in case cpu page size != gpu page size*/
+	result |= addr & (~PAGE_MASK);
+
+	result &= 0xFFFFFFFFFFFFF000ULL;
+
+	return result;
+}
+
+/**
+ * amdgpu_vm_update_pdes - make sure that all directories are valid
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @immediate: submit immediately to the paging queue
+ *
+ * Makes sure all directories are up to date.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool immediate)
+{
+	struct amdgpu_vm_update_params params;
+	struct amdgpu_vm_bo_base *entry;
+	bool flush_tlb_needed = false;
+	LIST_HEAD(relocated);
+	int r, idx;
+
+	amdgpu_vm_assert_locked(vm);
+
+	spin_lock(&vm->status_lock);
+	list_splice_init(&vm->relocated, &relocated);
+	spin_unlock(&vm->status_lock);
+
+	if (list_empty(&relocated))
+		return 0;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return -ENODEV;
+
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	params.vm = vm;
+	params.immediate = immediate;
+
+	r = vm->update_funcs->prepare(&params, NULL,
+				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
+	if (r)
+		goto error;
+
+	list_for_each_entry(entry, &relocated, vm_status) {
+		/* vm_flush_needed after updating moved PDEs */
+		flush_tlb_needed |= entry->moved;
+
+		r = amdgpu_vm_pde_update(&params, entry);
+		if (r)
+			goto error;
+	}
+
+	r = vm->update_funcs->commit(&params, &vm->last_update);
+	if (r)
+		goto error;
+
+	if (flush_tlb_needed)
+		atomic64_inc(&vm->tlb_seq);
+
+	while (!list_empty(&relocated)) {
+		entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
+					 vm_status);
+		amdgpu_vm_bo_idle(entry);
+	}
+
+error:
+	drm_dev_exit(idx);
+	return r;
+}
+
+/**
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+				 struct dma_fence_cb *cb)
+{
+	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+
+	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+	atomic64_inc(&tlb_cb->vm->tlb_seq);
+	kfree(tlb_cb);
+}
+
+/**
+ * amdgpu_vm_tlb_flush - prepare TLB flush
+ *
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+		    struct dma_fence **fence,
+		    struct amdgpu_vm_tlb_seq_struct *tlb_cb)
+{
+	struct amdgpu_vm *vm = params->vm;
+
+	tlb_cb->vm = vm;
+	if (!fence || !*fence) {
+		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+		return;
+	}
+
+	if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+				    amdgpu_vm_tlb_seq_cb)) {
+		dma_fence_put(vm->last_tlb_flush);
+		vm->last_tlb_flush = dma_fence_get(*fence);
+	} else {
+		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+	}
+
+	/* Prepare a TLB flush fence to be attached to PTs */
+	if (!params->unlocked) {
+		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
+
+		/* Makes sure no PD/PT is freed before the flush */
+		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+				   DMA_RESV_USAGE_BOOKKEEP);
+	}
+}
+
+/**
+ * amdgpu_vm_update_range - update a range in the vm page table
+ *
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
+ * @immediate: immediate submission in a page fault
+ * @unlocked: unlocked invalidation during MM callback
+ * @flush_tlb: trigger tlb invalidation after update completed
+ * @allow_override: change MTYPE for local NUMA nodes
+ * @sync: fences we need to sync to
+ * @start: start of mapped range
+ * @last: last mapped entry
+ * @flags: flags for the entries
+ * @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
+ * @res: ttm_resource to map
+ * @pages_addr: DMA addresses to use for mapping
+ * @fence: optional resulting fence
+ *
+ * Fill in the page table entries between @start and @last.
+ *
+ * Returns:
+ * 0 for success, negative erro code for failure.
+ */
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   bool immediate, bool unlocked, bool flush_tlb,
+			   bool allow_override, struct amdgpu_sync *sync,
+			   uint64_t start, uint64_t last, uint64_t flags,
+			   uint64_t offset, uint64_t vram_base,
+			   struct ttm_resource *res, dma_addr_t *pages_addr,
+			   struct dma_fence **fence)
+{
+	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+	struct amdgpu_vm_update_params params;
+	struct amdgpu_res_cursor cursor;
+	int r, idx;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return -ENODEV;
+
+	tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+	if (!tlb_cb) {
+		drm_dev_exit(idx);
+		return -ENOMEM;
+	}
+
+	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
+	 * heavy-weight flush TLB unconditionally.
+	 */
+	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
+		     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
+
+	/*
+	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+	 */
+	flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
+
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	params.vm = vm;
+	params.immediate = immediate;
+	params.pages_addr = pages_addr;
+	params.unlocked = unlocked;
+	params.needs_flush = flush_tlb;
+	params.allow_override = allow_override;
+	INIT_LIST_HEAD(&params.tlb_flush_waitlist);
+
+	amdgpu_vm_eviction_lock(vm);
+	if (vm->evicting) {
+		r = -EBUSY;
+		goto error_free;
+	}
+
+	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
+		struct dma_fence *tmp = dma_fence_get_stub();
+
+		amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
+		swap(vm->last_unlocked, tmp);
+		dma_fence_put(tmp);
+	}
+
+	r = vm->update_funcs->prepare(&params, sync,
+				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
+	if (r)
+		goto error_free;
+
+	amdgpu_res_first(pages_addr ? NULL : res, offset,
+			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
+	while (cursor.remaining) {
+		uint64_t tmp, num_entries, addr;
+
+		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
+		if (pages_addr) {
+			bool contiguous = true;
+
+			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+				uint64_t pfn = cursor.start >> PAGE_SHIFT;
+				uint64_t count;
+
+				contiguous = pages_addr[pfn + 1] ==
+					pages_addr[pfn] + PAGE_SIZE;
+
+				tmp = num_entries /
+					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+				for (count = 2; count < tmp; ++count) {
+					uint64_t idx = pfn + count;
+
+					if (contiguous != (pages_addr[idx] ==
+					    pages_addr[idx - 1] + PAGE_SIZE))
+						break;
+				}
+				if (!contiguous)
+					count--;
+				num_entries = count *
+					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+			}
+
+			if (!contiguous) {
+				addr = cursor.start;
+				params.pages_addr = pages_addr;
+			} else {
+				addr = pages_addr[cursor.start >> PAGE_SHIFT];
+				params.pages_addr = NULL;
+			}
+
+		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
+			addr = vram_base + cursor.start;
+		} else {
+			addr = 0;
+		}
+
+		tmp = start + num_entries;
+		r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
+		if (r)
+			goto error_free;
+
+		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
+		start = tmp;
+	}
+
+	r = vm->update_funcs->commit(&params, fence);
+	if (r)
+		goto error_free;
+
+	if (params.needs_flush) {
+		amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
+		tlb_cb = NULL;
+	}
+
+	amdgpu_vm_pt_free_list(adev, &params);
+
+error_free:
+	kfree(tlb_cb);
+	amdgpu_vm_eviction_unlock(vm);
+	drm_dev_exit(idx);
+	return r;
+}
+
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+			  struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
+{
+	spin_lock(&vm->status_lock);
+	memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
+	spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update - update all BO mappings in the vm page table
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: requested BO and VM object
+ * @clear: if true clear the entries
+ *
+ * Fill in the page table entries for @bo_va.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
+			bool clear)
+{
+	struct amdgpu_bo *bo = bo_va->base.bo;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo_va_mapping *mapping;
+	struct dma_fence **last_update;
+	dma_addr_t *pages_addr = NULL;
+	struct ttm_resource *mem;
+	struct amdgpu_sync sync;
+	bool flush_tlb = clear;
+	uint64_t vram_base;
+	uint64_t flags;
+	bool uncached;
+	int r;
+
+	amdgpu_sync_create(&sync);
+	if (clear) {
+		mem = NULL;
+
+		/* Implicitly sync to command submissions in the same VM before
+		 * unmapping.
+		 */
+		r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+				     AMDGPU_SYNC_EQ_OWNER, vm);
+		if (r)
+			goto error_free;
+		if (bo) {
+			r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
+			if (r)
+				goto error_free;
+		}
+	} else if (!bo) {
+		mem = NULL;
+
+		/* PRT map operations don't need to sync to anything. */
+
+	} else {
+		struct drm_gem_object *obj = &bo->tbo.base;
+
+		if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
+			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
+			struct drm_gem_object *gobj = dma_buf->priv;
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			if (abo->tbo.resource &&
+			    abo->tbo.resource->mem_type == TTM_PL_VRAM)
+				bo = gem_to_amdgpu_bo(gobj);
+		}
+		mem = bo->tbo.resource;
+		if (mem && (mem->mem_type == TTM_PL_TT ||
+			    mem->mem_type == AMDGPU_PL_PREEMPT))
+			pages_addr = bo->tbo.ttm->dma_address;
+
+		/* Implicitly sync to moving fences before mapping anything */
+		r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
+				     AMDGPU_SYNC_EXPLICIT, vm);
+		if (r)
+			goto error_free;
+	}
+
+	if (bo) {
+		struct amdgpu_device *bo_adev;
+
+		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
+
+		if (amdgpu_bo_encrypted(bo))
+			flags |= AMDGPU_PTE_TMZ;
+
+		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+		vram_base = bo_adev->vm_manager.vram_base_offset;
+		uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
+	} else {
+		flags = 0x0;
+		vram_base = 0;
+		uncached = false;
+	}
+
+	if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
+		last_update = &vm->last_update;
+	else
+		last_update = &bo_va->last_pt_update;
+
+	if (!clear && bo_va->base.moved) {
+		flush_tlb = true;
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
+
+	} else if (bo_va->cleared != clear) {
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
+	}
+
+	list_for_each_entry(mapping, &bo_va->invalids, list) {
+		uint64_t update_flags = flags;
+
+		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+		 * but in case of something, we filter the flags in first place
+		 */
+		if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
+			update_flags &= ~AMDGPU_PTE_READABLE;
+		if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
+			update_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+		/* Apply ASIC specific mapping flags */
+		amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
+				      &update_flags);
+
+		trace_amdgpu_vm_bo_update(mapping);
+
+		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+					   !uncached, &sync, mapping->start,
+					   mapping->last, update_flags,
+					   mapping->offset, vram_base, mem,
+					   pages_addr, last_update);
+		if (r)
+			goto error_free;
+	}
+
+	/* If the BO is not in its preferred location add it back to
+	 * the evicted list so that it gets validated again on the
+	 * next command submission.
+	 */
+	if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
+		if (bo->tbo.resource &&
+		    !(bo->preferred_domains &
+		      amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
+			amdgpu_vm_bo_evicted(&bo_va->base);
+		else
+			amdgpu_vm_bo_idle(&bo_va->base);
+	} else {
+		amdgpu_vm_bo_done(&bo_va->base);
+	}
+
+	list_splice_init(&bo_va->invalids, &bo_va->valids);
+	bo_va->cleared = clear;
+	bo_va->base.moved = false;
+
+	if (trace_amdgpu_vm_bo_mapping_enabled()) {
+		list_for_each_entry(mapping, &bo_va->valids, list)
+			trace_amdgpu_vm_bo_mapping(mapping);
+	}
+
+error_free:
+	amdgpu_sync_free(&sync);
+	return r;
+}
+
+/**
+ * amdgpu_vm_update_prt_state - update the global PRT state
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
+{
+	unsigned long flags;
+	bool enable;
+
+	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
+	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
+	adev->gmc.gmc_funcs->set_prt(adev, enable);
+	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
+}
+
+/**
+ * amdgpu_vm_prt_get - add a PRT user
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
+{
+	if (!adev->gmc.gmc_funcs->set_prt)
+		return;
+
+	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
+		amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_put - drop a PRT user
+ *
+ * @adev: amdgpu_device pointer
+ */
+static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
+{
+	if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
+		amdgpu_vm_update_prt_state(adev);
+}
+
+/**
+ * amdgpu_vm_prt_cb - callback for updating the PRT status
+ *
+ * @fence: fence for the callback
+ * @_cb: the callback function
+ */
+static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
+{
+	struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
+
+	amdgpu_vm_prt_put(cb->adev);
+	kfree(cb);
+}
+
+/**
+ * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ *
+ * @adev: amdgpu_device pointer
+ * @fence: fence for the callback
+ */
+static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
+				 struct dma_fence *fence)
+{
+	struct amdgpu_prt_cb *cb;
+
+	if (!adev->gmc.gmc_funcs->set_prt)
+		return;
+
+	cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
+	if (!cb) {
+		/* Last resort when we are OOM */
+		if (fence)
+			dma_fence_wait(fence, false);
+
+		amdgpu_vm_prt_put(adev);
+	} else {
+		cb->adev = adev;
+		if (!fence || dma_fence_add_callback(fence, &cb->cb,
+						     amdgpu_vm_prt_cb))
+			amdgpu_vm_prt_cb(fence, &cb->cb);
+	}
+}
+
+/**
+ * amdgpu_vm_free_mapping - free a mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @mapping: mapping to be freed
+ * @fence: fence of the unmap operation
+ *
+ * Free a mapping and make sure we decrease the PRT usage count if applicable.
+ */
+static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_bo_va_mapping *mapping,
+				   struct dma_fence *fence)
+{
+	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
+		amdgpu_vm_add_prt_cb(adev, fence);
+	kfree(mapping);
+}
+
+/**
+ * amdgpu_vm_prt_fini - finish all prt mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Register a cleanup callback to disable PRT support after VM dies.
+ */
+static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+		/* Add a callback for each fence in the reservation object */
+		amdgpu_vm_prt_get(adev);
+		amdgpu_vm_add_prt_cb(adev, fence);
+	}
+}
+
+/**
+ * amdgpu_vm_clear_freed - clear freed BOs in the PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @fence: optional resulting fence (unchanged if no work needed to be done
+ * or if an error occurred)
+ *
+ * Make sure all freed BOs are cleared in the PT.
+ * PTs have to be reserved and mutex must be locked!
+ *
+ * Returns:
+ * 0 for success.
+ *
+ */
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm,
+			  struct dma_fence **fence)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct dma_fence *f = NULL;
+	struct amdgpu_sync sync;
+	int r;
+
+
+	/*
+	 * Implicitly sync to command submissions in the same VM before
+	 * unmapping.
+	 */
+	amdgpu_sync_create(&sync);
+	r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+			     AMDGPU_SYNC_EQ_OWNER, vm);
+	if (r)
+		goto error_free;
+
+	while (!list_empty(&vm->freed)) {
+		mapping = list_first_entry(&vm->freed,
+			struct amdgpu_bo_va_mapping, list);
+		list_del(&mapping->list);
+
+		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
+					   &sync, mapping->start, mapping->last,
+					   0, 0, 0, NULL, NULL, &f);
+		amdgpu_vm_free_mapping(adev, vm, mapping, f);
+		if (r) {
+			dma_fence_put(f);
+			goto error_free;
+		}
+	}
+
+	if (fence && f) {
+		dma_fence_put(*fence);
+		*fence = f;
+	} else {
+		dma_fence_put(f);
+	}
+
+error_free:
+	amdgpu_sync_free(&sync);
+	return r;
+
+}
+
+/**
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @ticket: optional reservation ticket used to reserve the VM
+ *
+ * Make sure all BOs which are moved are updated in the PTs.
+ *
+ * Returns:
+ * 0 for success.
+ *
+ * PTs have to be reserved!
+ */
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct ww_acquire_ctx *ticket)
+{
+	struct amdgpu_bo_va *bo_va;
+	struct dma_resv *resv;
+	bool clear, unlock;
+	int r;
+
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->moved)) {
+		bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
+		/* Per VM BOs never need to bo cleared in the page tables */
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;
+		spin_lock(&vm->status_lock);
+	}
+
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+					 base.vm_status);
+		resv = bo_va->base.bo->tbo.base.resv;
+		spin_unlock(&vm->status_lock);
+
+		/* Try to reserve the BO to avoid clearing its ptes */
+		if (!adev->debug_vm && dma_resv_trylock(resv)) {
+			clear = false;
+			unlock = true;
+		/* The caller is already holding the reservation lock */
+		} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
+			clear = false;
+			unlock = false;
+		/* Somebody else is using the BO right now */
+		} else {
+			clear = true;
+			unlock = false;
+		}
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+		if (unlock)
+			dma_resv_unlock(resv);
+		if (r)
+			return r;
+
+		/* Remember evicted DMABuf imports in compute VMs for later
+		 * validation
+		 */
+		if (vm->is_compute_context &&
+		    drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
+		    (!bo_va->base.bo->tbo.resource ||
+		     bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
+			amdgpu_vm_bo_evicted_user(&bo_va->base);
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask)
+{
+	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+	bool all_hub = false;
+	int xcc = 0, r = 0;
+
+	WARN_ON_ONCE(!vm->is_compute_context);
+
+	/*
+	 * It can be that we race and lose here, but that is extremely unlikely
+	 * and the worst thing which could happen is that we flush the changes
+	 * into the TLB once more which is harmless.
+	 */
+	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+		return 0;
+
+	if (adev->family == AMDGPU_FAMILY_AI ||
+	    adev->family == AMDGPU_FAMILY_RV)
+		all_hub = true;
+
+	for_each_inst(xcc, xcc_mask) {
+		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+						   all_hub, xcc);
+		if (r)
+			break;
+	}
+	return r;
+}
+
+/**
+ * amdgpu_vm_bo_add - add a bo to a specific vm
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @bo: amdgpu buffer object
+ *
+ * Add @bo into the requested vm.
+ * Add @bo to the list of bos associated with the vm
+ *
+ * Returns:
+ * Newly added bo_va or NULL for failure
+ *
+ * Object has to be reserved!
+ */
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+				      struct amdgpu_vm *vm,
+				      struct amdgpu_bo *bo)
+{
+	struct amdgpu_bo_va *bo_va;
+
+	bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
+	if (bo_va == NULL) {
+		return NULL;
+	}
+	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
+
+	bo_va->ref_count = 1;
+	bo_va->last_pt_update = dma_fence_get_stub();
+	INIT_LIST_HEAD(&bo_va->valids);
+	INIT_LIST_HEAD(&bo_va->invalids);
+
+	if (!bo)
+		return bo_va;
+
+	dma_resv_assert_held(bo->tbo.base.resv);
+	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
+		bo_va->is_xgmi = true;
+		/* Power up XGMI if it can be potentially used */
+		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
+	}
+
+	return bo_va;
+}
+
+
+/**
+ * amdgpu_vm_bo_insert_map - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	mapping->bo_va = bo_va;
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
+		amdgpu_vm_bo_moved(&bo_va->base);
+
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+					  struct amdgpu_bo *bo,
+					  uint64_t saddr,
+					  uint64_t offset,
+					  uint64_t size)
+{
+	uint64_t tmp, lpfn;
+
+	if (saddr & AMDGPU_GPU_PAGE_MASK
+	    || offset & AMDGPU_GPU_PAGE_MASK
+	    || size & AMDGPU_GPU_PAGE_MASK)
+		return -EINVAL;
+
+	if (check_add_overflow(saddr, size, &tmp)
+	    || check_add_overflow(offset, size, &tmp)
+	    || size == 0 /* which also leads to end < begin */)
+		return -EINVAL;
+
+	/* make sure object fit at this offset */
+	if (bo && offset + size > amdgpu_bo_size(bo))
+		return -EINVAL;
+
+	/* Ensure last pfn not exceed max_pfn */
+	lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+	if (lpfn >= adev->vm_manager.max_pfn)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_map - map bo inside a vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @saddr: where to map the BO
+ * @offset: requested offset in the BO
+ * @size: BO size in bytes
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add a mapping of the BO at the specefied addr into the VM.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+		     struct amdgpu_bo_va *bo_va,
+		     uint64_t saddr, uint64_t offset,
+		     uint64_t size, uint32_t flags)
+{
+	struct amdgpu_bo_va_mapping *mapping, *tmp;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	uint64_t eaddr;
+	int r;
+
+	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+	if (r)
+		return r;
+
+	saddr /= AMDGPU_GPU_PAGE_SIZE;
+	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
+
+	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+	if (tmp) {
+		/* bo and tmp overlap, invalid addr */
+		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
+			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
+			tmp->start, tmp->last + 1);
+		return -EINVAL;
+	}
+
+	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping)
+		return -ENOMEM;
+
+	mapping->start = saddr;
+	mapping->last = eaddr;
+	mapping->offset = offset;
+	mapping->flags = flags;
+
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @saddr: where to map the BO
+ * @offset: requested offset in the BO
+ * @size: BO size in bytes
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add a mapping of the BO at the specefied addr into the VM. Replace existing
+ * mappings as we do so.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+			     struct amdgpu_bo_va *bo_va,
+			     uint64_t saddr, uint64_t offset,
+			     uint64_t size, uint32_t flags)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+	uint64_t eaddr;
+	int r;
+
+	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+	if (r)
+		return r;
+
+	/* Allocate all the needed memory */
+	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping)
+		return -ENOMEM;
+
+	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
+	if (r) {
+		kfree(mapping);
+		return r;
+	}
+
+	saddr /= AMDGPU_GPU_PAGE_SIZE;
+	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
+
+	mapping->start = saddr;
+	mapping->last = eaddr;
+	mapping->offset = offset;
+	mapping->flags = flags;
+
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_unmap - remove bo mapping from vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to remove the address from
+ * @saddr: where to the BO is mapped
+ *
+ * Remove a mapping of the BO at the specefied addr from the VM.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ *
+ * Object has to be reserved and unreserved outside!
+ */
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+		       struct amdgpu_bo_va *bo_va,
+		       uint64_t saddr)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	bool valid = true;
+	int r;
+
+	saddr /= AMDGPU_GPU_PAGE_SIZE;
+
+	list_for_each_entry(mapping, &bo_va->valids, list) {
+		if (mapping->start == saddr)
+			break;
+	}
+
+	if (&mapping->list == &bo_va->valids) {
+		valid = false;
+
+		list_for_each_entry(mapping, &bo_va->invalids, list) {
+			if (mapping->start == saddr)
+				break;
+		}
+
+		if (&mapping->list == &bo_va->invalids)
+			return -ENOENT;
+	}
+
+	/* It's unlikely to happen that the mapping userq hasn't been idled
+	 * during user requests GEM unmap IOCTL except for forcing the unmap
+	 * from user space.
+	 */
+	if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) {
+		r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
+		if (unlikely(r == -EBUSY))
+			dev_warn_once(adev->dev,
+				      "Attempt to unmap an active userq buffer\n");
+	}
+
+	list_del(&mapping->list);
+	amdgpu_vm_it_remove(mapping, &vm->va);
+	mapping->bo_va = NULL;
+	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+
+	if (valid)
+		list_add(&mapping->list, &vm->freed);
+	else
+		amdgpu_vm_free_mapping(adev, vm, mapping,
+				       bo_va->last_pt_update);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM structure to use
+ * @saddr: start of the range
+ * @size: size of the range
+ *
+ * Remove all mappings in a range, split them as appropriate.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint64_t saddr, uint64_t size)
+{
+	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
+	LIST_HEAD(removed);
+	uint64_t eaddr;
+	int r;
+
+	r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+	if (r)
+		return r;
+
+	saddr /= AMDGPU_GPU_PAGE_SIZE;
+	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
+
+	/* Allocate all the needed memory */
+	before = kzalloc(sizeof(*before), GFP_KERNEL);
+	if (!before)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&before->list);
+
+	after = kzalloc(sizeof(*after), GFP_KERNEL);
+	if (!after) {
+		kfree(before);
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&after->list);
+
+	/* Now gather all removed mappings */
+	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+	while (tmp) {
+		/* Remember mapping split at the start */
+		if (tmp->start < saddr) {
+			before->start = tmp->start;
+			before->last = saddr - 1;
+			before->offset = tmp->offset;
+			before->flags = tmp->flags;
+			before->bo_va = tmp->bo_va;
+			list_add(&before->list, &tmp->bo_va->invalids);
+		}
+
+		/* Remember mapping split at the end */
+		if (tmp->last > eaddr) {
+			after->start = eaddr + 1;
+			after->last = tmp->last;
+			after->offset = tmp->offset;
+			after->offset += (after->start - tmp->start) << PAGE_SHIFT;
+			after->flags = tmp->flags;
+			after->bo_va = tmp->bo_va;
+			list_add(&after->list, &tmp->bo_va->invalids);
+		}
+
+		list_del(&tmp->list);
+		list_add(&tmp->list, &removed);
+
+		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
+	}
+
+	/* And free them up */
+	list_for_each_entry_safe(tmp, next, &removed, list) {
+		amdgpu_vm_it_remove(tmp, &vm->va);
+		list_del(&tmp->list);
+
+		if (tmp->start < saddr)
+		    tmp->start = saddr;
+		if (tmp->last > eaddr)
+		    tmp->last = eaddr;
+
+		tmp->bo_va = NULL;
+		list_add(&tmp->list, &vm->freed);
+		trace_amdgpu_vm_bo_unmap(NULL, tmp);
+	}
+
+	/* Insert partial mapping before the range */
+	if (!list_empty(&before->list)) {
+		struct amdgpu_bo *bo = before->bo_va->base.bo;
+
+		amdgpu_vm_it_insert(before, &vm->va);
+		if (before->flags & AMDGPU_VM_PAGE_PRT)
+			amdgpu_vm_prt_get(adev);
+
+		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+		    !before->bo_va->base.moved)
+			amdgpu_vm_bo_moved(&before->bo_va->base);
+	} else {
+		kfree(before);
+	}
+
+	/* Insert partial mapping after the range */
+	if (!list_empty(&after->list)) {
+		struct amdgpu_bo *bo = after->bo_va->base.bo;
+
+		amdgpu_vm_it_insert(after, &vm->va);
+		if (after->flags & AMDGPU_VM_PAGE_PRT)
+			amdgpu_vm_prt_get(adev);
+
+		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
+		    !after->bo_va->base.moved)
+			amdgpu_vm_bo_moved(&after->bo_va->base);
+	} else {
+		kfree(after);
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_bo_lookup_mapping - find mapping by address
+ *
+ * @vm: the requested VM
+ * @addr: the address
+ *
+ * Find a mapping by it's address.
+ *
+ * Returns:
+ * The amdgpu_bo_va_mapping matching for addr or NULL
+ *
+ */
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr)
+{
+	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
+}
+
+/**
+ * amdgpu_vm_bo_trace_cs - trace all reserved mappings
+ *
+ * @vm: the requested vm
+ * @ticket: CS ticket
+ *
+ * Trace all mappings of BOs reserved during a command submission.
+ */
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+
+	if (!trace_amdgpu_vm_bo_cs_enabled())
+		return;
+
+	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
+	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
+		if (mapping->bo_va && mapping->bo_va->base.bo) {
+			struct amdgpu_bo *bo;
+
+			bo = mapping->bo_va->base.bo;
+			if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
+			    ticket)
+				continue;
+		}
+
+		trace_amdgpu_vm_bo_cs(mapping);
+	}
+}
+
+/**
+ * amdgpu_vm_bo_del - remove a bo from a specific vm
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: requested bo_va
+ *
+ * Remove @bo_va->bo from the requested vm.
+ *
+ * Object have to be reserved!
+ */
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
+		      struct amdgpu_bo_va *bo_va)
+{
+	struct amdgpu_bo_va_mapping *mapping, *next;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_vm_bo_base **base;
+
+	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
+
+	if (bo) {
+		dma_resv_assert_held(bo->tbo.base.resv);
+		if (amdgpu_vm_is_bo_always_valid(vm, bo))
+			ttm_bo_set_bulk_move(&bo->tbo, NULL);
+
+		for (base = &bo_va->base.bo->vm_bo; *base;
+		     base = &(*base)->next) {
+			if (*base != &bo_va->base)
+				continue;
+
+			amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
+			*base = bo_va->base.next;
+			break;
+		}
+	}
+
+	spin_lock(&vm->status_lock);
+	list_del(&bo_va->base.vm_status);
+	spin_unlock(&vm->status_lock);
+
+	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
+		list_del(&mapping->list);
+		amdgpu_vm_it_remove(mapping, &vm->va);
+		mapping->bo_va = NULL;
+		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+		list_add(&mapping->list, &vm->freed);
+	}
+	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
+		list_del(&mapping->list);
+		amdgpu_vm_it_remove(mapping, &vm->va);
+		amdgpu_vm_free_mapping(adev, vm, mapping,
+				       bo_va->last_pt_update);
+	}
+
+	dma_fence_put(bo_va->last_pt_update);
+
+	if (bo && bo_va->is_xgmi)
+		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
+
+	kfree(bo_va);
+}
+
+/**
+ * amdgpu_vm_evictable - check if we can evict a VM
+ *
+ * @bo: A page table of the VM.
+ *
+ * Check if it is possible to evict a VM.
+ */
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
+{
+	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
+
+	/* Page tables of a destroyed VM can go away immediately */
+	if (!bo_base || !bo_base->vm)
+		return true;
+
+	/* Don't evict VM page tables while they are busy */
+	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
+		return false;
+
+	/* Try to block ongoing updates */
+	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
+		return false;
+
+	/* Don't evict VM page tables while they are updated */
+	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
+		amdgpu_vm_eviction_unlock(bo_base->vm);
+		return false;
+	}
+
+	bo_base->vm->evicting = true;
+	amdgpu_vm_eviction_unlock(bo_base->vm);
+	return true;
+}
+
+/**
+ * amdgpu_vm_bo_invalidate - mark the bo as invalid
+ *
+ * @bo: amdgpu buffer object
+ * @evicted: is the BO evicted
+ *
+ * Mark @bo as invalid.
+ */
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
+{
+	struct amdgpu_vm_bo_base *bo_base;
+
+	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
+		if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
+			amdgpu_vm_bo_evicted(bo_base);
+			continue;
+		}
+
+		if (bo_base->moved)
+			continue;
+		bo_base->moved = true;
+
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			amdgpu_vm_bo_relocated(bo_base);
+		else if (amdgpu_vm_is_bo_always_valid(vm, bo))
+			amdgpu_vm_bo_moved(bo_base);
+		else
+			amdgpu_vm_bo_invalidated(bo_base);
+	}
+}
+
+/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+		       bool evicted)
+{
+	struct amdgpu_vm_bo_base *bo_base;
+
+	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
+		spin_lock(&vm->status_lock);
+		amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+		amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+		spin_unlock(&vm->status_lock);
+	}
+
+	amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
+ * amdgpu_vm_get_block_size - calculate VM page table size as power of two
+ *
+ * @vm_size: VM size
+ *
+ * Returns:
+ * VM page table as power of two
+ */
+static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
+{
+	/* Total bits covered by PD + PTs */
+	unsigned bits = ilog2(vm_size) + 18;
+
+	/* Make sure the PD is 4K in size up to 8GB address space.
+	   Above that split equal between PD and PTs */
+	if (vm_size <= 8)
+		return (bits - 9);
+	else
+		return ((bits + 3) / 2);
+}
+
+/**
+ * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
+ *
+ * @adev: amdgpu_device pointer
+ * @min_vm_size: the minimum vm size in GB if it's set auto
+ * @fragment_size_default: Default PTE fragment size
+ * @max_level: max VMPT level
+ * @max_bits: max address space size in bits
+ *
+ */
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
+			   uint32_t fragment_size_default, unsigned max_level,
+			   unsigned max_bits)
+{
+	unsigned int max_size = 1 << (max_bits - 30);
+	unsigned int vm_size;
+	uint64_t tmp;
+
+	/* adjust vm size first */
+	if (amdgpu_vm_size != -1) {
+		vm_size = amdgpu_vm_size;
+		if (vm_size > max_size) {
+			dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
+				 amdgpu_vm_size, max_size);
+			vm_size = max_size;
+		}
+	} else {
+		struct sysinfo si;
+		unsigned int phys_ram_gb;
+
+		/* Optimal VM size depends on the amount of physical
+		 * RAM available. Underlying requirements and
+		 * assumptions:
+		 *
+		 *  - Need to map system memory and VRAM from all GPUs
+		 *     - VRAM from other GPUs not known here
+		 *     - Assume VRAM <= system memory
+		 *  - On GFX8 and older, VM space can be segmented for
+		 *    different MTYPEs
+		 *  - Need to allow room for fragmentation, guard pages etc.
+		 *
+		 * This adds up to a rough guess of system memory x3.
+		 * Round up to power of two to maximize the available
+		 * VM size with the given page table size.
+		 */
+		si_meminfo(&si);
+		phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
+			       (1 << 30) - 1) >> 30;
+		vm_size = roundup_pow_of_two(
+			clamp(phys_ram_gb * 3, min_vm_size, max_size));
+	}
+
+	adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
+
+	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
+	if (amdgpu_vm_block_size != -1)
+		tmp >>= amdgpu_vm_block_size - 9;
+	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
+	adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
+	switch (adev->vm_manager.num_level) {
+	case 3:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
+		break;
+	case 2:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
+		break;
+	case 1:
+		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
+		break;
+	default:
+		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
+	}
+	/* block size depends on vm size and hw setup*/
+	if (amdgpu_vm_block_size != -1)
+		adev->vm_manager.block_size =
+			min((unsigned)amdgpu_vm_block_size, max_bits
+			    - AMDGPU_GPU_PAGE_SHIFT
+			    - 9 * adev->vm_manager.num_level);
+	else if (adev->vm_manager.num_level > 1)
+		adev->vm_manager.block_size = 9;
+	else
+		adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
+
+	if (amdgpu_vm_fragment_size == -1)
+		adev->vm_manager.fragment_size = fragment_size_default;
+	else
+		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
+
+	dev_info(
+		adev->dev,
+		"vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
+		vm_size, adev->vm_manager.num_level + 1,
+		adev->vm_manager.block_size, adev->vm_manager.fragment_size);
+}
+
+/**
+ * amdgpu_vm_wait_idle - wait for the VM to become idle
+ *
+ * @vm: VM object to wait for
+ * @timeout: timeout to wait for VM to become idle
+ */
+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
+{
+	timeout = drm_sched_entity_flush(&vm->immediate, timeout);
+	if (timeout <= 0)
+		return timeout;
+
+	return drm_sched_entity_flush(&vm->delayed, timeout);
+}
+
+static void amdgpu_vm_destroy_task_info(struct kref *kref)
+{
+	struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
+
+	kfree(ti);
+}
+
+static inline struct amdgpu_vm *
+amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+	struct amdgpu_vm *vm;
+	unsigned long flags;
+
+	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+
+	return vm;
+}
+
+/**
+ * amdgpu_vm_put_task_info - reference down the vm task_info ptr
+ *
+ * @task_info: task_info struct under discussion.
+ *
+ * frees the vm task_info ptr at the last put
+ */
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
+{
+	if (task_info)
+		kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
+}
+
+/**
+ * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
+ *
+ * @vm: VM to get info from
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
+{
+	struct amdgpu_task_info *ti = NULL;
+
+	if (vm) {
+		ti = vm->task_info;
+		kref_get(&vm->task_info->refcount);
+	}
+
+	return ti;
+}
+
+/**
+ * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ *
+ * Returns the reference counted task_info structure, which must be
+ * referenced down with amdgpu_vm_put_task_info.
+ */
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
+{
+	return amdgpu_vm_get_task_info_vm(
+			amdgpu_vm_get_vm_from_pasid(adev, pasid));
+}
+
+static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
+{
+	vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL);
+	if (!vm->task_info)
+		return -ENOMEM;
+
+	kref_init(&vm->task_info->refcount);
+	return 0;
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+	if (!vm->task_info)
+		return;
+
+	if (vm->task_info->task.pid == current->pid)
+		return;
+
+	vm->task_info->task.pid = current->pid;
+	get_task_comm(vm->task_info->task.comm, current);
+
+	if (current->group_leader->mm != current->mm)
+		return;
+
+	vm->task_info->tgid = current->group_leader->pid;
+	get_task_comm(vm->task_info->process_name, current->group_leader);
+}
+
+/**
+ * amdgpu_vm_init - initialize a vm instance
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @xcp_id: GPU partition selection id
+ * @pasid: the pasid the VM is using on this GPU
+ *
+ * Init @vm fields.
+ *
+ * Returns:
+ * 0 for success, error for failure.
+ */
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   int32_t xcp_id, uint32_t pasid)
+{
+	struct amdgpu_bo *root_bo;
+	struct amdgpu_bo_vm *root;
+	int r, i;
+
+	vm->va = RB_ROOT_CACHED;
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
+		vm->reserved_vmid[i] = NULL;
+	INIT_LIST_HEAD(&vm->evicted);
+	INIT_LIST_HEAD(&vm->evicted_user);
+	INIT_LIST_HEAD(&vm->relocated);
+	INIT_LIST_HEAD(&vm->moved);
+	INIT_LIST_HEAD(&vm->idle);
+	INIT_LIST_HEAD(&vm->invalidated);
+	spin_lock_init(&vm->status_lock);
+	INIT_LIST_HEAD(&vm->freed);
+	INIT_LIST_HEAD(&vm->done);
+	INIT_KFIFO(vm->faults);
+
+	r = amdgpu_vm_init_entities(adev, vm);
+	if (r)
+		return r;
+
+	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
+
+	vm->is_compute_context = false;
+
+	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+				    AMDGPU_VM_USE_CPU_FOR_GFX);
+
+	dev_dbg(adev->dev, "VM update mode is %s\n",
+		vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update &&
+		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+		  "CPU update of VM recommended only for large BAR system\n");
+
+	if (vm->use_cpu_for_update)
+		vm->update_funcs = &amdgpu_vm_cpu_funcs;
+	else
+		vm->update_funcs = &amdgpu_vm_sdma_funcs;
+
+	vm->last_update = dma_fence_get_stub();
+	vm->last_unlocked = dma_fence_get_stub();
+	vm->last_tlb_flush = dma_fence_get_stub();
+	vm->generation = amdgpu_vm_generation(adev, NULL);
+
+	mutex_init(&vm->eviction_lock);
+	vm->evicting = false;
+	vm->tlb_fence_context = dma_fence_context_alloc(1);
+
+	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
+				false, &root, xcp_id);
+	if (r)
+		goto error_free_delayed;
+
+	root_bo = amdgpu_bo_ref(&root->bo);
+	r = amdgpu_bo_reserve(root_bo, true);
+	if (r) {
+		amdgpu_bo_unref(&root_bo);
+		goto error_free_delayed;
+	}
+
+	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
+	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
+	if (r)
+		goto error_free_root;
+
+	r = amdgpu_vm_pt_clear(adev, vm, root, false);
+	if (r)
+		goto error_free_root;
+
+	r = amdgpu_vm_create_task_info(vm);
+	if (r)
+		dev_dbg(adev->dev, "Failed to create task info for VM\n");
+
+	/* Store new PASID in XArray (if non-zero) */
+	if (pasid != 0) {
+		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
+		if (r < 0)
+			goto error_free_root;
+
+		vm->pasid = pasid;
+	}
+
+	amdgpu_bo_unreserve(vm->root.bo);
+	amdgpu_bo_unref(&root_bo);
+
+	return 0;
+
+error_free_root:
+	/* If PASID was partially set, erase it from XArray before failing */
+	if (vm->pasid != 0) {
+		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+		vm->pasid = 0;
+	}
+	amdgpu_vm_pt_free_root(adev, vm);
+	amdgpu_bo_unreserve(vm->root.bo);
+	amdgpu_bo_unref(&root_bo);
+
+error_free_delayed:
+	dma_fence_put(vm->last_tlb_flush);
+	dma_fence_put(vm->last_unlocked);
+	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+	amdgpu_vm_fini_entities(vm);
+
+	return r;
+}
+
+/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting.
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	int r;
+
+	r = amdgpu_bo_reserve(vm->root.bo, true);
+	if (r)
+		return r;
+
+	/* Update VM state */
+	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+	dev_dbg(adev->dev, "VM update mode is %s\n",
+		vm->use_cpu_for_update ? "CPU" : "SDMA");
+	WARN_ONCE((vm->use_cpu_for_update &&
+		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+		  "CPU update of VM recommended only for large BAR system\n");
+
+	if (vm->use_cpu_for_update) {
+		/* Sync with last SDMA update/clear before switching to CPU */
+		r = amdgpu_bo_sync_wait(vm->root.bo,
+					AMDGPU_FENCE_OWNER_UNDEFINED, true);
+		if (r)
+			goto unreserve_bo;
+
+		vm->update_funcs = &amdgpu_vm_cpu_funcs;
+		r = amdgpu_vm_pt_map_tables(adev, vm);
+		if (r)
+			goto unreserve_bo;
+
+	} else {
+		vm->update_funcs = &amdgpu_vm_sdma_funcs;
+	}
+
+	dma_fence_put(vm->last_update);
+	vm->last_update = dma_fence_get_stub();
+	vm->is_compute_context = true;
+
+unreserve_bo:
+	amdgpu_bo_unreserve(vm->root.bo);
+	return r;
+}
+
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
+{
+	for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+		if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+		      vm->stats[i].evicted == 0))
+			return false;
+	}
+	return true;
+}
+
+/**
+ * amdgpu_vm_fini - tear down a vm instance
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
+ * Tear down @vm.
+ * Unbind the VM and remove all bos from the vm bo list
+ */
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amdgpu_bo_va_mapping *mapping, *tmp;
+	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
+	struct amdgpu_bo *root;
+	unsigned long flags;
+	int i;
+
+	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
+	root = amdgpu_bo_ref(vm->root.bo);
+	amdgpu_bo_reserve(root, true);
+	/* Remove PASID mapping before destroying VM */
+	if (vm->pasid != 0) {
+		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
+		vm->pasid = 0;
+	}
+	dma_fence_wait(vm->last_unlocked, false);
+	dma_fence_put(vm->last_unlocked);
+	dma_fence_wait(vm->last_tlb_flush, false);
+	/* Make sure that all fence callbacks have completed */
+	spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
+	spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+	dma_fence_put(vm->last_tlb_flush);
+
+	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
+		if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
+			amdgpu_vm_prt_fini(adev, vm);
+			prt_fini_needed = false;
+		}
+
+		list_del(&mapping->list);
+		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
+	}
+
+	amdgpu_vm_pt_free_root(adev, vm);
+	amdgpu_bo_unreserve(root);
+	amdgpu_bo_unref(&root);
+	WARN_ON(vm->root.bo);
+
+	amdgpu_vm_fini_entities(vm);
+
+	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
+		dev_err(adev->dev, "still active bo inside vm\n");
+	}
+	rbtree_postorder_for_each_entry_safe(mapping, tmp,
+					     &vm->va.rb_root, rb) {
+		/* Don't remove the mapping here, we don't want to trigger a
+		 * rebalance and the tree is about to be destroyed anyway.
+		 */
+		list_del(&mapping->list);
+		kfree(mapping);
+	}
+
+	dma_fence_put(vm->last_update);
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+		amdgpu_vmid_free_reserved(adev, vm, i);
+	}
+
+	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+	if (!amdgpu_vm_stats_is_zero(vm)) {
+		struct amdgpu_task_info *ti = vm->task_info;
+
+		dev_warn(adev->dev,
+			 "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+			 ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
+	}
+
+	amdgpu_vm_put_task_info(vm->task_info);
+}
+
+/**
+ * amdgpu_vm_manager_init - init the VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+{
+	/* Concurrent flushes are only possible starting with Vega10 and
+	 * are broken on Navi10 and Navi14.
+	 */
+	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
+					      adev->asic_type == CHIP_NAVI10 ||
+					      adev->asic_type == CHIP_NAVI14);
+	amdgpu_vmid_mgr_init(adev);
+
+	spin_lock_init(&adev->vm_manager.prt_lock);
+	atomic_set(&adev->vm_manager.num_prt_users, 0);
+
+	/* If not overridden by the user, by default, only in large BAR systems
+	 * Compute VM tables will be updated by CPU
+	 */
+#ifdef CONFIG_X86_64
+	if (amdgpu_vm_update_mode == -1) {
+		/* For asic with VF MMIO access protection
+		 * avoid using CPU for VM table updates
+		 */
+		if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+		    !amdgpu_sriov_vf_mmio_access_protection(adev))
+			adev->vm_manager.vm_update_mode =
+				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
+		else
+			adev->vm_manager.vm_update_mode = 0;
+	} else
+		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
+#else
+	adev->vm_manager.vm_update_mode = 0;
+#endif
+
+	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
+}
+
+/**
+ * amdgpu_vm_manager_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+{
+	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
+	xa_destroy(&adev->vm_manager.pasids);
+
+	amdgpu_vmid_mgr_fini(adev);
+}
+
+/**
+ * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_vm
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+	union drm_amdgpu_vm *args = data;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
+
+	/* No valid flags defined yet */
+	if (args->in.flags)
+		return -EINVAL;
+
+	switch (args->in.op) {
+	case AMDGPU_VM_OP_RESERVE_VMID:
+		/* We only have requirement to reserve vmid from gfxhub */
+		return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
+	case AMDGPU_VM_OP_UNRESERVE_VMID:
+		amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_handle_fault - graceful handling of VM faults.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @ts: Timestamp of the fault
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ *           GFX 9.4.3.
+ * @addr: Address of the fault
+ * @write_fault: true is write fault, false is read fault
+ *
+ * Try to gracefully handle a VM fault. Return true if the fault was handled and
+ * shouldn't be reported any more.
+ */
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
+			    u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+			    bool write_fault)
+{
+	bool is_compute_context = false;
+	struct amdgpu_bo *root;
+	unsigned long irqflags;
+	uint64_t value, flags;
+	struct amdgpu_vm *vm;
+	int r;
+
+	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	if (vm) {
+		root = amdgpu_bo_ref(vm->root.bo);
+		is_compute_context = vm->is_compute_context;
+	} else {
+		root = NULL;
+	}
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+
+	if (!root)
+		return false;
+
+	addr /= AMDGPU_GPU_PAGE_SIZE;
+
+	if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
+	    node_id, addr, ts, write_fault)) {
+		amdgpu_bo_unref(&root);
+		return true;
+	}
+
+	r = amdgpu_bo_reserve(root, true);
+	if (r)
+		goto error_unref;
+
+	/* Double check that the VM still exists */
+	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	if (vm && vm->root.bo != root)
+		vm = NULL;
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+	if (!vm)
+		goto error_unlock;
+
+	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
+		AMDGPU_PTE_SYSTEM;
+
+	if (is_compute_context) {
+		/* Intentionally setting invalid PTE flag
+		 * combination to force a no-retry-fault
+		 */
+		flags = AMDGPU_VM_NORETRY_FLAGS;
+		value = 0;
+	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
+		/* Redirect the access to the dummy page */
+		value = adev->dummy_page_addr;
+		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
+			AMDGPU_PTE_WRITEABLE;
+
+	} else {
+		/* Let the hw retry silently on the PTE */
+		value = 0;
+	}
+
+	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
+	if (r) {
+		pr_debug("failed %d to reserve fence slot\n", r);
+		goto error_unlock;
+	}
+
+	r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
+				   NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
+	if (r)
+		goto error_unlock;
+
+	r = amdgpu_vm_update_pdes(adev, vm, true);
+
+error_unlock:
+	amdgpu_bo_unreserve(root);
+	if (r < 0)
+		dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
+
+error_unref:
+	amdgpu_bo_unref(&root);
+
+	return false;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+/**
+ * amdgpu_debugfs_vm_bo_info  - print BO info for the VM
+ *
+ * @vm: Requested VM for printing BO info
+ * @m: debugfs file
+ *
+ * Print BO information in debugfs file for the VM
+ */
+void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
+{
+	struct amdgpu_bo_va *bo_va, *tmp;
+	u64 total_idle = 0;
+	u64 total_evicted = 0;
+	u64 total_relocated = 0;
+	u64 total_moved = 0;
+	u64 total_invalidated = 0;
+	u64 total_done = 0;
+	unsigned int total_idle_objs = 0;
+	unsigned int total_evicted_objs = 0;
+	unsigned int total_relocated_objs = 0;
+	unsigned int total_moved_objs = 0;
+	unsigned int total_invalidated_objs = 0;
+	unsigned int total_done_objs = 0;
+	unsigned int id = 0;
+
+	amdgpu_vm_assert_locked(vm);
+
+	spin_lock(&vm->status_lock);
+	seq_puts(m, "\tIdle BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	total_idle_objs = id;
+	id = 0;
+
+	seq_puts(m, "\tEvicted BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	total_evicted_objs = id;
+	id = 0;
+
+	seq_puts(m, "\tRelocated BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	total_relocated_objs = id;
+	id = 0;
+
+	seq_puts(m, "\tMoved BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	total_moved_objs = id;
+	id = 0;
+
+	seq_puts(m, "\tInvalidated BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_invalidated += amdgpu_bo_print_info(id++,	bo_va->base.bo, m);
+	}
+	total_invalidated_objs = id;
+	id = 0;
+
+	seq_puts(m, "\tDone BOs:\n");
+	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
+		if (!bo_va->base.bo)
+			continue;
+		total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+	}
+	spin_unlock(&vm->status_lock);
+	total_done_objs = id;
+
+	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle,
+		   total_idle_objs);
+	seq_printf(m, "\tTotal evicted size:     %12lld\tobjs:\t%d\n", total_evicted,
+		   total_evicted_objs);
+	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated,
+		   total_relocated_objs);
+	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved,
+		   total_moved_objs);
+	seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
+		   total_invalidated_objs);
+	seq_printf(m, "\tTotal done size:        %12lld\tobjs:\t%d\n", total_done,
+		   total_done_objs);
+}
+#endif
+
+/**
+ * amdgpu_vm_update_fault_cache - update cached fault into.
+ * @adev: amdgpu device pointer
+ * @pasid: PASID of the VM
+ * @addr: Address of the fault
+ * @status: GPUVM fault status register
+ * @vmhub: which vmhub got the fault
+ *
+ * Cache the fault info for later use by userspace in debugging.
+ */
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+				  unsigned int pasid,
+				  uint64_t addr,
+				  uint32_t status,
+				  unsigned int vmhub)
+{
+	struct amdgpu_vm *vm;
+	unsigned long flags;
+
+	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
+	/* Don't update the fault cache if status is 0.  In the multiple
+	 * fault case, subsequent faults will return a 0 status which is
+	 * useless for userspace and replaces the useful fault status, so
+	 * only update if status is non-0.
+	 */
+	if (vm && status) {
+		vm->fault_info.addr = addr;
+		vm->fault_info.status = status;
+		/*
+		 * Update the fault information globally for later usage
+		 * when vm could be stale or freed.
+		 */
+		adev->vm_manager.fault_info.addr = addr;
+		adev->vm_manager.fault_info.vmhub = vmhub;
+		adev->vm_manager.fault_info.status = status;
+
+		if (AMDGPU_IS_GFXHUB(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else if (AMDGPU_IS_MMHUB0(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else if (AMDGPU_IS_MMHUB1(vmhub)) {
+			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
+			vm->fault_info.vmhub |=
+				(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
+		} else {
+			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
+		}
+	}
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+}
+
+/**
+ * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
+ *
+ * @vm: VM to test against.
+ * @bo: BO to be tested.
+ *
+ * Returns true if the BO shares the dma_resv object with the root PD and is
+ * always guaranteed to be valid inside the VM.
+ */
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
+{
+	return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
+}
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+			       struct amdgpu_task_info *task_info)
+{
+	dev_err(adev->dev,
+		" Process %s pid %d thread %s pid %d\n",
+		task_info->process_name, task_info->tgid,
+		task_info->task.comm, task_info->task.pid);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
new file mode 100644
index 000000000000..15d757c016cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -0,0 +1,691 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+#ifndef __AMDGPU_VM_H__
+#define __AMDGPU_VM_H__
+
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/rbtree.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo.h>
+#include <linux/sched/mm.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_ring.h"
+#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
+
+struct drm_exec;
+
+struct amdgpu_bo_va;
+struct amdgpu_job;
+struct amdgpu_bo_list_entry;
+struct amdgpu_bo_vm;
+
+/*
+ * GPUVM handling
+ */
+
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE	0x3FFFF
+
+/* number of entries in page table */
+#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
+
+#define AMDGPU_PTE_VALID	(1ULL << 0)
+#define AMDGPU_PTE_SYSTEM	(1ULL << 1)
+#define AMDGPU_PTE_SNOOPED	(1ULL << 2)
+
+/* RV+ */
+#define AMDGPU_PTE_TMZ		(1ULL << 3)
+
+/* VI only */
+#define AMDGPU_PTE_EXECUTABLE	(1ULL << 4)
+
+#define AMDGPU_PTE_READABLE	(1ULL << 5)
+#define AMDGPU_PTE_WRITEABLE	(1ULL << 6)
+
+#define AMDGPU_PTE_FRAG(x)	((x & 0x1fULL) << 7)
+
+/* TILED for VEGA10, reserved for older ASICs  */
+#define AMDGPU_PTE_PRT		(1ULL << 51)
+
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE		(1ULL << 54)
+
+#define AMDGPU_PTE_LOG          (1ULL << 55)
+
+/* PTE is handled as PDE for VEGA10 (Translate Further) */
+#define AMDGPU_PTE_TF		(1ULL << 56)
+
+/* MALL noalloc for sienna_cichlid, reserved for older ASICs  */
+#define AMDGPU_PTE_NOALLOC	(1ULL << 58)
+
+/* PDE Block Fragment Size for VEGA10 */
+#define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
+
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS	(AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \
+				AMDGPU_PTE_TF)
+
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+				   AMDGPU_PTE_PRT)
+/* For GFX9 */
+#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype)	((uint64_t)(mtype) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK	AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10(flags, mtype)			\
+	(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) |	\
+	  AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
+
+#define AMDGPU_MTYPE_NC 0
+#define AMDGPU_MTYPE_CC 2
+
+#define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
+                                | AMDGPU_PTE_SNOOPED    \
+                                | AMDGPU_PTE_EXECUTABLE \
+                                | AMDGPU_PTE_READABLE   \
+                                | AMDGPU_PTE_WRITEABLE  \
+                                | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
+
+/* gfx10 */
+#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype)	((uint64_t)(mtype) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK     AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL)
+#define AMDGPU_PTE_MTYPE_NV10(flags, mtype)			\
+	(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) |	\
+	  AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype))
+
+/* gfx12 */
+#define AMDGPU_PTE_PRT_GFX12		(1ULL << 56)
+#define AMDGPU_PTE_PRT_FLAG(adev)	\
+	((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT)
+
+#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype)	((uint64_t)(mtype) << 54)
+#define AMDGPU_PTE_MTYPE_GFX12_MASK	AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL)
+#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype)				\
+	(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) |	\
+	  AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
+
+#define AMDGPU_PTE_DCC			(1ULL << 58)
+#define AMDGPU_PTE_IS_PTE		(1ULL << 63)
+
+/* PDE Block Fragment Size for gfx v12 */
+#define AMDGPU_PDE_BFS_GFX12(a)		((uint64_t)((a) & 0x1fULL) << 58)
+#define AMDGPU_PDE_BFS_FLAG(adev, a)	\
+	((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a))
+/* PDE is handled as PTE for gfx v12 */
+#define AMDGPU_PDE_PTE_GFX12		(1ULL << 63)
+#define AMDGPU_PDE_PTE_FLAG(adev)	\
+	((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE)
+
+/* How to program VM fault handling */
+#define AMDGPU_VM_FAULT_STOP_NEVER	0
+#define AMDGPU_VM_FAULT_STOP_FIRST	1
+#define AMDGPU_VM_FAULT_STOP_ALWAYS	2
+
+/* How much VRAM be reserved for page tables */
+#define AMDGPU_VM_RESERVED_VRAM		(8ULL << 20)
+
+/*
+ * max number of VMHUB
+ * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1
+ */
+#define AMDGPU_MAX_VMHUBS			13
+#define AMDGPU_GFXHUB_START			0
+#define AMDGPU_MMHUB0_START			8
+#define AMDGPU_MMHUB1_START			12
+#define AMDGPU_GFXHUB(x)			(AMDGPU_GFXHUB_START + (x))
+#define AMDGPU_MMHUB0(x)			(AMDGPU_MMHUB0_START + (x))
+#define AMDGPU_MMHUB1(x)			(AMDGPU_MMHUB1_START + (x))
+
+#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START)
+#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START)
+#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS)
+
+/* Reserve space at top/bottom of address space for kernel use */
+#define AMDGPU_VA_RESERVED_CSA_SIZE		(2ULL << 20)
+#define AMDGPU_VA_RESERVED_CSA_START(adev)	(((adev)->vm_manager.max_pfn \
+						  << AMDGPU_GPU_PAGE_SHIFT)  \
+						 - AMDGPU_VA_RESERVED_CSA_SIZE)
+#define AMDGPU_VA_RESERVED_SEQ64_SIZE		(2ULL << 20)
+#define AMDGPU_VA_RESERVED_SEQ64_START(adev)	(AMDGPU_VA_RESERVED_CSA_START(adev) \
+						 - AMDGPU_VA_RESERVED_SEQ64_SIZE)
+#define AMDGPU_VA_RESERVED_TRAP_SIZE		(2ULL << 12)
+#define AMDGPU_VA_RESERVED_TRAP_START(adev)	(AMDGPU_VA_RESERVED_SEQ64_START(adev) \
+						 - AMDGPU_VA_RESERVED_TRAP_SIZE)
+#define AMDGPU_VA_RESERVED_BOTTOM		(1ULL << 16)
+#define AMDGPU_VA_RESERVED_TOP			(AMDGPU_VA_RESERVED_TRAP_SIZE + \
+						 AMDGPU_VA_RESERVED_SEQ64_SIZE + \
+						 AMDGPU_VA_RESERVED_CSA_SIZE)
+
+/* See vm_update_mode */
+#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
+#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
+
+/* VMPT level enumerate, and the hiberachy is:
+ * PDB2->PDB1->PDB0->PTB
+ */
+enum amdgpu_vm_level {
+	AMDGPU_VM_PDB2,
+	AMDGPU_VM_PDB1,
+	AMDGPU_VM_PDB0,
+	AMDGPU_VM_PTB
+};
+
+/* base structure for tracking BO usage in a VM */
+struct amdgpu_vm_bo_base {
+	/* constant after initialization */
+	struct amdgpu_vm		*vm;
+	struct amdgpu_bo		*bo;
+
+	/* protected by bo being reserved */
+	struct amdgpu_vm_bo_base	*next;
+
+	/* protected by vm status_lock */
+	struct list_head		vm_status;
+
+	/* if the bo is counted as shared in mem stats
+	 * protected by vm status_lock */
+	bool				shared;
+
+	/* protected by the BO being reserved */
+	bool				moved;
+};
+
+/* provided by hw blocks that can write ptes, e.g., sdma */
+struct amdgpu_vm_pte_funcs {
+	/* number of dw to reserve per operation */
+	unsigned	copy_pte_num_dw;
+
+	/* copy pte entries from GART */
+	void (*copy_pte)(struct amdgpu_ib *ib,
+			 uint64_t pe, uint64_t src,
+			 unsigned count);
+
+	/* write pte one entry at a time with addr mapping */
+	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+			  uint64_t value, unsigned count,
+			  uint32_t incr);
+	/* for linear pte/pde updates without addr mapping */
+	void (*set_pte_pde)(struct amdgpu_ib *ib,
+			    uint64_t pe,
+			    uint64_t addr, unsigned count,
+			    uint32_t incr, uint64_t flags);
+};
+
+struct amdgpu_task_info {
+	struct drm_wedge_task_info task;
+	char		process_name[TASK_COMM_LEN];
+	pid_t		tgid;
+	struct kref	refcount;
+};
+
+/**
+ * struct amdgpu_vm_update_params
+ *
+ * Encapsulate some VM table update parameters to reduce
+ * the number of function parameters
+ *
+ */
+struct amdgpu_vm_update_params {
+
+	/**
+	 * @adev: amdgpu device we do this update for
+	 */
+	struct amdgpu_device *adev;
+
+	/**
+	 * @vm: optional amdgpu_vm we do this update for
+	 */
+	struct amdgpu_vm *vm;
+
+	/**
+	 * @immediate: if changes should be made immediately
+	 */
+	bool immediate;
+
+	/**
+	 * @unlocked: true if the root BO is not locked
+	 */
+	bool unlocked;
+
+	/**
+	 * @pages_addr:
+	 *
+	 * DMA addresses to use for mapping
+	 */
+	dma_addr_t *pages_addr;
+
+	/**
+	 * @job: job to used for hw submission
+	 */
+	struct amdgpu_job *job;
+
+	/**
+	 * @num_dw_left: number of dw left for the IB
+	 */
+	unsigned int num_dw_left;
+
+	/**
+	 * @needs_flush: true whenever we need to invalidate the TLB
+	 */
+	bool needs_flush;
+
+	/**
+	 * @allow_override: true for memory that is not uncached: allows MTYPE
+	 * to be overridden for NUMA local memory.
+	 */
+	bool allow_override;
+
+	/**
+	 * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+	 */
+	struct list_head tlb_flush_waitlist;
+};
+
+struct amdgpu_vm_update_funcs {
+	int (*map_table)(struct amdgpu_bo_vm *bo);
+	int (*prepare)(struct amdgpu_vm_update_params *p,
+		       struct amdgpu_sync *sync, u64 k_job_id);
+	int (*update)(struct amdgpu_vm_update_params *p,
+		      struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
+		      unsigned count, uint32_t incr, uint64_t flags);
+	int (*commit)(struct amdgpu_vm_update_params *p,
+		      struct dma_fence **fence);
+};
+
+struct amdgpu_vm_fault_info {
+	/* fault address */
+	uint64_t	addr;
+	/* fault status register */
+	uint32_t	status;
+	/* which vmhub? gfxhub, mmhub, etc. */
+	unsigned int	vmhub;
+};
+
+struct amdgpu_mem_stats {
+	struct drm_memory_stats drm;
+
+	/* buffers that requested this placement but are currently evicted */
+	uint64_t evicted;
+};
+
+struct amdgpu_vm {
+	/* tree of virtual addresses mapped */
+	struct rb_root_cached	va;
+
+	/* Lock to prevent eviction while we are updating page tables
+	 * use vm_eviction_lock/unlock(vm)
+	 */
+	struct mutex		eviction_lock;
+	bool			evicting;
+	unsigned int		saved_flags;
+
+	/* Lock to protect vm_bo add/del/move on all lists of vm */
+	spinlock_t		status_lock;
+
+	/* Memory statistics for this vm, protected by status_lock */
+	struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
+	/*
+	 * The following lists contain amdgpu_vm_bo_base objects for either
+	 * PDs, PTs or per VM BOs. The state transits are:
+	 *
+	 * evicted -> relocated (PDs, PTs) or moved (per VM BOs) -> idle
+	 */
+
+	/* Per-VM and PT BOs who needs a validation */
+	struct list_head	evicted;
+
+	/* PT BOs which relocated and their parent need an update */
+	struct list_head	relocated;
+
+	/* per VM BOs moved, but not yet updated in the PT */
+	struct list_head	moved;
+
+	/* All BOs of this VM not currently in the state machine */
+	struct list_head	idle;
+
+	/*
+	 * The following lists contain amdgpu_vm_bo_base objects for BOs which
+	 * have their own dma_resv object and not depend on the root PD. Their
+	 * state transits are:
+	 *
+	 * evicted_user or invalidated -> done
+	 */
+
+	/* BOs for user mode queues that need a validation */
+	struct list_head	evicted_user;
+
+	/* regular invalidated BOs, but not yet updated in the PT */
+	struct list_head	invalidated;
+
+	/* BOs which are invalidated, has been updated in the PTs */
+	struct list_head        done;
+
+	/*
+	 * This list contains amdgpu_bo_va_mapping objects which have been freed
+	 * but not updated in the PTs
+	 */
+	struct list_head	freed;
+
+	/* contains the page directory */
+	struct amdgpu_vm_bo_base     root;
+	struct dma_fence	*last_update;
+
+	/* Scheduler entities for page table updates */
+	struct drm_sched_entity	immediate;
+	struct drm_sched_entity	delayed;
+
+	/* Last finished delayed update */
+	atomic64_t		tlb_seq;
+	struct dma_fence	*last_tlb_flush;
+	atomic64_t		kfd_last_flushed_seq;
+	uint64_t		tlb_fence_context;
+
+	/* How many times we had to re-generate the page tables */
+	uint64_t		generation;
+
+	/* Last unlocked submission to the scheduler entities */
+	struct dma_fence	*last_unlocked;
+
+	unsigned int		pasid;
+	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS];
+
+	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
+	bool					use_cpu_for_update;
+
+	/* Functions to use for VM table updates */
+	const struct amdgpu_vm_update_funcs	*update_funcs;
+
+	/* Up to 128 pending retry page faults */
+	DECLARE_KFIFO(faults, u64, 128);
+
+	/* Points to the KFD process VM info */
+	struct amdkfd_process_info *process_info;
+
+	/* List node in amdkfd_process_info.vm_list_head */
+	struct list_head	vm_list_node;
+
+	/* Valid while the PD is reserved or fenced */
+	uint64_t		pd_phys_addr;
+
+	/* Some basic info about the task */
+	struct amdgpu_task_info *task_info;
+
+	/* Store positions of group of BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+	/* Flag to indicate if VM is used for compute */
+	bool			is_compute_context;
+
+	/* Memory partition number, -1 means any partition */
+	int8_t			mem_id;
+
+	/* cached fault info */
+	struct amdgpu_vm_fault_info fault_info;
+};
+
+struct amdgpu_vm_manager {
+	/* Handling of VMIDs */
+	struct amdgpu_vmid_mgr			id_mgr[AMDGPU_MAX_VMHUBS];
+	unsigned int				first_kfd_vmid;
+	bool					concurrent_flush;
+
+	uint64_t				max_pfn;
+	uint32_t				num_level;
+	uint32_t				block_size;
+	uint32_t				fragment_size;
+	enum amdgpu_vm_level			root_level;
+	/* vram base address for page table entry  */
+	u64					vram_base_offset;
+	/* vm pte handling */
+	const struct amdgpu_vm_pte_funcs	*vm_pte_funcs;
+	struct drm_gpu_scheduler		*vm_pte_scheds[AMDGPU_MAX_RINGS];
+	unsigned				vm_pte_num_scheds;
+	struct amdgpu_ring			*page_fault;
+
+	/* partial resident texture handling */
+	spinlock_t				prt_lock;
+	atomic_t				num_prt_users;
+
+	/* controls how VM page tables are updated for Graphics and Compute.
+	 * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU
+	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
+	 */
+	int					vm_update_mode;
+
+	/* PASID to VM mapping, will be used in interrupt context to
+	 * look up VM of a page fault
+	 */
+	struct xarray				pasids;
+	/* Global registration of recent page fault information */
+	struct amdgpu_vm_fault_info	fault_info;
+};
+
+struct amdgpu_bo_va_mapping;
+
+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+
+extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs;
+extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs;
+
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+
+long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id, uint32_t pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
+		      unsigned int num_fences);
+int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
+			     unsigned int num_fences);
+bool amdgpu_vm_ready(struct amdgpu_vm *vm);
+uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		       struct ww_acquire_ctx *ticket,
+		       int (*callback)(void *p, struct amdgpu_bo *bo),
+		       void *param);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
+int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm, bool immediate);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm,
+			  struct dma_fence **fence);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint32_t flush_type,
+				uint32_t xcc_mask);
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+			    struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			   bool immediate, bool unlocked, bool flush_tlb,
+			   bool allow_override, struct amdgpu_sync *sync,
+			   uint64_t start, uint64_t last, uint64_t flags,
+			   uint64_t offset, uint64_t vram_base,
+			   struct ttm_resource *res, dma_addr_t *pages_addr,
+			   struct dma_fence **fence);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+			struct amdgpu_bo_va *bo_va,
+			bool clear);
+bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+			    struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+		       bool evicted);
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+				       struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+				      struct amdgpu_vm *vm,
+				      struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+		     struct amdgpu_bo_va *bo_va,
+		     uint64_t addr, uint64_t offset,
+		     uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
+			     struct amdgpu_bo_va *bo_va,
+			     uint64_t addr, uint64_t offset,
+			     uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+		       struct amdgpu_bo_va *bo_va,
+		       uint64_t addr);
+int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				uint64_t saddr, uint64_t size);
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr);
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
+void amdgpu_vm_bo_del(struct amdgpu_device *adev,
+		      struct amdgpu_bo_va *bo_va);
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
+			   uint32_t fragment_size_default, unsigned max_level,
+			   unsigned max_bits);
+int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
+				  struct amdgpu_job *job);
+void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
+
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid);
+
+struct amdgpu_task_info *
+amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm);
+
+void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info);
+
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
+			    u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
+			    bool write_fault);
+
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
+			  struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
+
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		       struct amdgpu_bo_vm *vmbo, bool immediate);
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+			int32_t xcp_id);
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+			 struct amdgpu_vm_bo_base *entry);
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+			  uint64_t start, uint64_t end,
+			  uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+			    struct amdgpu_vm_update_params *params);
+
+#if defined(CONFIG_DEBUG_FS)
+void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
+#endif
+
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+
+bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+	unsigned long flags;
+	spinlock_t *lock;
+
+	/*
+	 * Workaround to stop racing between the fence signaling and handling
+	 * the cb. The lock is static after initially setting it up, just make
+	 * sure that the dma_fence structure isn't freed up.
+	 */
+	rcu_read_lock();
+	lock = vm->last_tlb_flush->lock;
+	rcu_read_unlock();
+
+	spin_lock_irqsave(lock, flags);
+	spin_unlock_irqrestore(lock, flags);
+
+	return atomic64_read(&vm->tlb_seq);
+}
+
+/*
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_noreclaim_save();
+		return true;
+	}
+	return false;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_noreclaim_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
+void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
+				  unsigned int pasid,
+				  uint64_t addr,
+				  uint32_t status,
+				  unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+				 struct amdgpu_vm *vm,
+				 struct dma_fence **fence);
+
+void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
+			       struct amdgpu_task_info *task_info);
+
+#define amdgpu_vm_bo_va_for_each_valid_mapping(bo_va, mapping) \
+		list_for_each_entry(mapping, &(bo_va)->valids, list)
+#define amdgpu_vm_bo_va_for_each_invalid_mapping(bo_va, mapping) \
+		list_for_each_entry(mapping, &(bo_va)->invalids, list)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
new file mode 100644
index 000000000000..22e2e5b47341
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_vm.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+
+/**
+ * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
+{
+	table->bo.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+	return amdgpu_bo_kmap(&table->bo, NULL);
+}
+
+/**
+ * amdgpu_vm_cpu_prepare - prepare page table update with the CPU
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @sync: sync obj with fences to wait on
+ * @k_job_id: the id for tracing/debug purposes
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
+				 struct amdgpu_sync *sync,
+				 u64 k_job_id)
+{
+	if (!sync)
+		return 0;
+
+	return amdgpu_sync_wait(sync, true);
+}
+
+/**
+ * amdgpu_vm_cpu_update - helper to update page tables via CPU
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @vmbo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Write count number of PT/PD entries directly.
+ */
+static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
+				struct amdgpu_bo_vm *vmbo, uint64_t pe,
+				uint64_t addr, unsigned count, uint32_t incr,
+				uint64_t flags)
+{
+	unsigned int i;
+	uint64_t value;
+	long r;
+
+	r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+				  true, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+
+	pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
+
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
+
+	for (i = 0; i < count; i++) {
+		value = p->pages_addr ?
+			amdgpu_vm_map_gart(p->pages_addr, addr) :
+			addr;
+		amdgpu_gmc_set_pte_pde(p->adev, (void *)(uintptr_t)pe,
+				       i, value, flags);
+		addr += incr;
+	}
+	return 0;
+}
+
+/**
+ * amdgpu_vm_cpu_commit - commit page table update to the HW
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @fence: unused
+ *
+ * Make sure that the hardware sees the page table updates.
+ */
+static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
+				struct dma_fence **fence)
+{
+	if (p->needs_flush)
+		atomic64_inc(&p->vm->tlb_seq);
+
+	mb();
+	amdgpu_device_flush_hdp(p->adev, NULL);
+	return 0;
+}
+
+const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = {
+	.map_table = amdgpu_vm_cpu_map_table,
+	.prepare = amdgpu_vm_cpu_prepare,
+	.update = amdgpu_vm_cpu_update,
+	.commit = amdgpu_vm_cpu_commit
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
new file mode 100644
index 000000000000..f794fb1cc06e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -0,0 +1,976 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
+
+/*
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+	uint64_t pfn;
+	struct amdgpu_vm_bo_base *parent;
+	struct amdgpu_vm_bo_base *entry;
+	unsigned int level;
+};
+
+/**
+ * amdgpu_vm_pt_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
+					     unsigned int level)
+{
+	switch (level) {
+	case AMDGPU_VM_PDB2:
+	case AMDGPU_VM_PDB1:
+	case AMDGPU_VM_PDB0:
+		return 9 * (AMDGPU_VM_PDB0 - level) +
+			adev->vm_manager.block_size;
+	case AMDGPU_VM_PTB:
+		return 0;
+	default:
+		return ~0;
+	}
+}
+
+/**
+ * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of entries in a page directory or page table.
+ */
+static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
+					     unsigned int level)
+{
+	unsigned int shift;
+
+	shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+	if (level == adev->vm_manager.root_level)
+		/* For the root directory */
+		return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+			>> shift;
+	else if (level != AMDGPU_VM_PTB)
+		/* Everything in between */
+		return 512;
+
+	/* For the page tables on the leaves */
+	return AMDGPU_VM_PTE_COUNT(adev);
+}
+
+/**
+ * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
+					  unsigned int level)
+{
+	if (level <= adev->vm_manager.root_level)
+		return 0xffffffff;
+	else if (level != AMDGPU_VM_PTB)
+		return 0x1ff;
+	else
+		return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
+ * amdgpu_vm_pt_size - returns the size of the page table in bytes
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
+ */
+static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
+				      unsigned int level)
+{
+	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_bo_base *
+amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
+{
+	struct amdgpu_bo *parent = pt->bo->parent;
+
+	if (!parent)
+		return NULL;
+
+	return parent->vm_bo;
+}
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm, uint64_t start,
+			       struct amdgpu_vm_pt_cursor *cursor)
+{
+	cursor->pfn = start;
+	cursor->parent = NULL;
+	cursor->entry = &vm->root;
+	cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+				    struct amdgpu_vm_pt_cursor *cursor)
+{
+	unsigned int mask, shift, idx;
+
+	if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
+	    !cursor->entry->bo)
+		return false;
+
+	mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
+	shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
+
+	++cursor->level;
+	idx = (cursor->pfn >> shift) & mask;
+	cursor->parent = cursor->entry;
+	cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+				 struct amdgpu_vm_pt_cursor *cursor)
+{
+
+	unsigned int shift, num_entries;
+	struct amdgpu_bo_vm *parent;
+
+	/* Root doesn't have a sibling */
+	if (!cursor->parent)
+		return false;
+
+	/* Go to our parents and see if we got a sibling */
+	shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
+	num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
+	parent = to_amdgpu_bo_vm(cursor->parent->bo);
+
+	if (cursor->entry == &parent->entries[num_entries - 1])
+		return false;
+
+	cursor->pfn += 1ULL << shift;
+	cursor->pfn &= ~((1ULL << shift) - 1);
+	++cursor->entry;
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+	if (!cursor->parent)
+		return false;
+
+	--cursor->level;
+	cursor->entry = cursor->parent;
+	cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+	return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+			      struct amdgpu_vm_pt_cursor *cursor)
+{
+	/* First try a newborn child */
+	if (amdgpu_vm_pt_descendant(adev, cursor))
+		return;
+
+	/* If that didn't worked try to find a sibling */
+	while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+		/* No sibling, go to our parents and grandparents */
+		if (!amdgpu_vm_pt_ancestor(cursor)) {
+			cursor->pfn = ~0ll;
+			return;
+		}
+	}
+}
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_vm_pt_cursor *start,
+				   struct amdgpu_vm_pt_cursor *cursor)
+{
+	if (start)
+		*cursor = *start;
+	else
+		amdgpu_vm_pt_start(adev, vm, 0, cursor);
+
+	while (amdgpu_vm_pt_descendant(adev, cursor))
+		;
+}
+
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+				      struct amdgpu_vm_bo_base *entry)
+{
+	return entry && (!start || entry != start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+				  struct amdgpu_vm_pt_cursor *cursor)
+{
+	if (!cursor->entry)
+		return;
+
+	if (!cursor->parent)
+		cursor->entry = NULL;
+	else if (amdgpu_vm_pt_sibling(adev, cursor))
+		while (amdgpu_vm_pt_descendant(adev, cursor))
+			;
+	else
+		amdgpu_vm_pt_ancestor(cursor);
+}
+
+/*
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)		\
+	for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)),		\
+	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+	     amdgpu_vm_pt_continue_dfs((start), (entry));			\
+	     (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_clear - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
+ * @vmbo: BO to clear
+ * @immediate: use an immediate update
+ *
+ * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		       struct amdgpu_bo_vm *vmbo, bool immediate)
+{
+	unsigned int level = adev->vm_manager.root_level;
+	struct ttm_operation_ctx ctx = { true, false };
+	struct amdgpu_vm_update_params params;
+	struct amdgpu_bo *ancestor = &vmbo->bo;
+	unsigned int entries;
+	struct amdgpu_bo *bo = &vmbo->bo;
+	uint64_t addr;
+	int r, idx;
+
+	/* Figure out our place in the hierarchy */
+	if (ancestor->parent) {
+		++level;
+		while (ancestor->parent->parent) {
+			++level;
+			ancestor = ancestor->parent;
+		}
+	}
+
+	entries = amdgpu_bo_size(bo) / 8;
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (r)
+		return r;
+
+	if (!drm_dev_enter(adev_to_drm(adev), &idx))
+		return -ENODEV;
+
+	r = vm->update_funcs->map_table(vmbo);
+	if (r)
+		goto exit;
+
+	memset(&params, 0, sizeof(params));
+	params.adev = adev;
+	params.vm = vm;
+	params.immediate = immediate;
+
+	r = vm->update_funcs->prepare(&params, NULL,
+				      AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR);
+	if (r)
+		goto exit;
+
+	addr = 0;
+
+	uint64_t value = 0, flags = 0;
+	if (adev->asic_type >= CHIP_VEGA10) {
+		if (level != AMDGPU_VM_PTB) {
+			/* Handle leaf PDEs as PTEs */
+			flags |= AMDGPU_PDE_PTE_FLAG(adev);
+			amdgpu_gmc_get_vm_pde(adev, level,
+					      &value, &flags);
+		} else {
+			/* Workaround for fault priority problem on GMC9 */
+			flags = AMDGPU_PTE_EXECUTABLE;
+		}
+	}
+
+	r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+				     value, flags);
+	if (r)
+		goto exit;
+
+	r = vm->update_funcs->commit(&params, NULL);
+exit:
+	drm_dev_exit(idx);
+	return r;
+}
+
+/**
+ * amdgpu_vm_pt_create - create bo for PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requesting vm
+ * @level: the page table level
+ * @immediate: use a immediate update
+ * @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
+ */
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+			int32_t xcp_id)
+{
+	struct amdgpu_bo_param bp;
+	unsigned int num_entries;
+
+	memset(&bp, 0, sizeof(bp));
+
+	bp.size = amdgpu_vm_pt_size(adev, level);
+	bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+
+	if (!adev->gmc.is_app_apu)
+		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	else
+		bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
+	bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
+	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+	if (level < AMDGPU_VM_PTB)
+		num_entries = amdgpu_vm_pt_num_entries(adev, level);
+	else
+		num_entries = 0;
+
+	bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
+	if (vm->use_cpu_for_update)
+		bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+	bp.type = ttm_bo_type_kernel;
+	bp.no_wait_gpu = immediate;
+	bp.xcp_id_plus1 = xcp_id + 1;
+
+	if (vm->root.bo)
+		bp.resv = vm->root.bo->tbo.base.resv;
+
+	return amdgpu_bo_create_vm(adev, &bp, vmbo);
+}
+
+/**
+ * amdgpu_vm_pt_alloc - Allocate a specific page table
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @cursor: Which page table to allocate
+ * @immediate: use an immediate update
+ *
+ * Make sure a specific page table or directory is allocated.
+ *
+ * Returns:
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
+ */
+static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
+			      struct amdgpu_vm *vm,
+			      struct amdgpu_vm_pt_cursor *cursor,
+			      bool immediate)
+{
+	struct amdgpu_vm_bo_base *entry = cursor->entry;
+	struct amdgpu_bo *pt_bo;
+	struct amdgpu_bo_vm *pt;
+	int r;
+
+	if (entry->bo)
+		return 0;
+
+	amdgpu_vm_eviction_unlock(vm);
+	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+				vm->root.bo->xcp_id);
+	amdgpu_vm_eviction_lock(vm);
+	if (r)
+		return r;
+
+	/* Keep a reference to the root directory to avoid
+	 * freeing them up in the wrong order.
+	 */
+	pt_bo = &pt->bo;
+	pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
+	amdgpu_vm_bo_base_init(entry, vm, pt_bo);
+	r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
+	if (r)
+		goto error_free_pt;
+
+	return 0;
+
+error_free_pt:
+	amdgpu_bo_unref(&pt_bo);
+	return r;
+}
+
+/**
+ * amdgpu_vm_pt_free - free one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+{
+	if (!entry->bo)
+		return;
+
+	amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
+	entry->bo->vm_bo = NULL;
+	ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+
+	spin_lock(&entry->vm->status_lock);
+	list_del(&entry->vm_status);
+	spin_unlock(&entry->vm->status_lock);
+	amdgpu_bo_unref(&entry->bo);
+}
+
+/**
+ * amdgpu_vm_pt_free_list - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @params: see amdgpu_vm_update_params definition
+ *
+ * Free the page directory objects saved in the flush list
+ */
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+			    struct amdgpu_vm_update_params *params)
+{
+	struct amdgpu_vm_bo_base *entry, *next;
+	bool unlocked = params->unlocked;
+
+	if (list_empty(&params->tlb_flush_waitlist))
+		return;
+
+	/*
+	 * unlocked unmap clear page table leaves, warning to free the page entry.
+	 */
+	WARN_ON(unlocked);
+
+	list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
+		amdgpu_vm_pt_free(entry);
+}
+
+/**
+ * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
+ *
+ * @params: parameters for the update
+ * @cursor: first PT entry to start DF search from, non NULL
+ *
+ * This list will be freed after TLB flush.
+ */
+static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
+				  struct amdgpu_vm_pt_cursor *cursor)
+{
+	struct amdgpu_vm_pt_cursor seek;
+	struct amdgpu_vm_bo_base *entry;
+
+	spin_lock(&params->vm->status_lock);
+	for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
+		if (entry && entry->bo)
+			list_move(&entry->vm_status, &params->tlb_flush_waitlist);
+	}
+
+	/* enter start node now */
+	list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+	spin_unlock(&params->vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_pt_free_root - free root PD
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the root page directory and everything below it.
+ */
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_bo_base *entry;
+
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+		if (entry)
+			amdgpu_vm_pt_free(entry);
+	}
+}
+
+/**
+ * amdgpu_vm_pde_update - update a single level in the hierarchy
+ *
+ * @params: parameters for the update
+ * @entry: entry to update
+ *
+ * Makes sure the requested entry in parent is up to date.
+ */
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+			 struct amdgpu_vm_bo_base *entry)
+{
+	struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
+	struct amdgpu_bo *bo, *pbo;
+	struct amdgpu_vm *vm = params->vm;
+	uint64_t pde, pt, flags;
+	unsigned int level;
+
+	if (WARN_ON(!parent))
+		return -EINVAL;
+
+	bo = parent->bo;
+	for (level = 0, pbo = bo->parent; pbo; ++level)
+		pbo = pbo->parent;
+
+	level += params->adev->vm_manager.root_level;
+	amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
+	pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
+	return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
+					1, 0, flags);
+}
+
+/**
+ * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: pointer to PTE flags
+ *
+ * Update PTE no-retry flags when TF is enabled.
+ */
+static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev,
+						uint64_t *flags)
+{
+	/*
+	 * Update no-retry flags with the corresponding TF
+	 * no-retry combination.
+	 */
+	if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+		*flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+		*flags |= adev->gmc.noretry_flags;
+	}
+}
+
+/*
+ * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
+ *
+ * Make sure to set the right flags for the PTEs at the desired level.
+ */
+static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
+				       struct amdgpu_bo_vm *pt,
+				       unsigned int level,
+				       uint64_t pe, uint64_t addr,
+				       unsigned int count, uint32_t incr,
+				       uint64_t flags)
+{
+	struct amdgpu_device *adev = params->adev;
+
+	if (level != AMDGPU_VM_PTB) {
+		flags |= AMDGPU_PDE_PTE_FLAG(params->adev);
+		amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
+
+	} else if (adev->asic_type >= CHIP_VEGA10 &&
+		   !(flags & AMDGPU_PTE_VALID) &&
+		   !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) {
+
+		/* Workaround for fault priority problem on GMC9 */
+		flags |= AMDGPU_PTE_EXECUTABLE;
+	}
+
+	/*
+	 * Update no-retry flags to use the no-retry flag combination
+	 * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination
+	 * does not work when TF is enabled. So, replace them with
+	 * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for
+	 * all cases.
+	 */
+	if (level == AMDGPU_VM_PTB)
+		amdgpu_vm_pte_update_noretry_flags(adev, &flags);
+
+	/* APUs mapping system memory may need different MTYPEs on different
+	 * NUMA nodes. Only do this for contiguous ranges that can be assumed
+	 * to be on the same NUMA node.
+	 */
+	if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) &&
+	    adev->gmc.gmc_funcs->override_vm_pte_flags &&
+	    num_possible_nodes() > 1 && !params->pages_addr && params->allow_override)
+		amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags);
+
+	params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
+					 flags);
+}
+
+/**
+ * amdgpu_vm_pte_fragment - get fragment for PTEs
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
+ *
+ * Returns the first possible fragment for the start and end address.
+ */
+static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
+				   uint64_t start, uint64_t end, uint64_t flags,
+				   unsigned int *frag, uint64_t *frag_end)
+{
+	/**
+	 * The MC L1 TLB supports variable sized pages, based on a fragment
+	 * field in the PTE. When this field is set to a non-zero value, page
+	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+	 * flags are considered valid for all PTEs within the fragment range
+	 * and corresponding mappings are assumed to be physically contiguous.
+	 *
+	 * The L1 TLB can store a single PTE for the whole fragment,
+	 * significantly increasing the space available for translation
+	 * caching. This leads to large improvements in throughput when the
+	 * TLB is under pressure.
+	 *
+	 * The L2 TLB distributes small and large fragments into two
+	 * asymmetric partitions. The large fragment cache is significantly
+	 * larger. Thus, we try to use large fragments wherever possible.
+	 * Userspace can support this by aligning virtual base address and
+	 * allocation size to the fragment size.
+	 *
+	 * Starting with Vega10 the fragment size only controls the L1. The L2
+	 * is now directly feed with small/huge/giant pages from the walker.
+	 */
+	unsigned int max_frag;
+
+	if (params->adev->asic_type < CHIP_VEGA10)
+		max_frag = params->adev->vm_manager.fragment_size;
+	else
+		max_frag = 31;
+
+	/* system pages are non continuously */
+	if (params->pages_addr) {
+		*frag = 0;
+		*frag_end = end;
+		return;
+	}
+
+	/* This intentionally wraps around if no bit is set */
+	*frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
+	if (*frag >= max_frag) {
+		*frag = max_frag;
+		*frag_end = end & ~((1ULL << max_frag) - 1);
+	} else {
+		*frag_end = start + (1 << *frag);
+	}
+}
+
+/**
+ * amdgpu_vm_ptes_update - make sure that page tables are valid
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to, the next dst inside the function
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+			  uint64_t start, uint64_t end,
+			  uint64_t dst, uint64_t flags)
+{
+	struct amdgpu_device *adev = params->adev;
+	struct amdgpu_vm_pt_cursor cursor;
+	uint64_t frag_start = start, frag_end;
+	unsigned int frag;
+	int r;
+
+	/* figure out the initial fragment */
+	amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
+			       &frag_end);
+
+	/* walk over the address space and update the PTs */
+	amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+	while (cursor.pfn < end) {
+		unsigned int shift, parent_shift, mask;
+		uint64_t incr, entry_end, pe_start;
+		struct amdgpu_bo *pt;
+
+		if (!params->unlocked) {
+			/* make sure that the page tables covering the
+			 * address range are actually allocated
+			 */
+			r = amdgpu_vm_pt_alloc(params->adev, params->vm,
+					       &cursor, params->immediate);
+			if (r)
+				return r;
+		}
+
+		shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+		parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+		if (params->unlocked) {
+			/* Unlocked updates are only allowed on the leaves */
+			if (amdgpu_vm_pt_descendant(adev, &cursor))
+				continue;
+		} else if (adev->asic_type < CHIP_VEGA10 &&
+			   (flags & AMDGPU_PTE_VALID)) {
+			/* No huge page support before GMC v9 */
+			if (cursor.level != AMDGPU_VM_PTB) {
+				if (!amdgpu_vm_pt_descendant(adev, &cursor))
+					return -ENOENT;
+				continue;
+			}
+		} else if (frag < shift) {
+			/* We can't use this level when the fragment size is
+			 * smaller than the address shift. Go to the next
+			 * child entry and try again.
+			 */
+			if (amdgpu_vm_pt_descendant(adev, &cursor))
+				continue;
+		} else if (frag >= parent_shift) {
+			/* If the fragment size is even larger than the parent
+			 * shift we should go up one level and check it again.
+			 */
+			if (!amdgpu_vm_pt_ancestor(&cursor))
+				return -EINVAL;
+			continue;
+		}
+
+		pt = cursor.entry->bo;
+		if (!pt) {
+			/* We need all PDs and PTs for mapping something, */
+			if (flags & AMDGPU_PTE_VALID)
+				return -ENOENT;
+
+			/* but unmapping something can happen at a higher
+			 * level.
+			 */
+			if (!amdgpu_vm_pt_ancestor(&cursor))
+				return -EINVAL;
+
+			pt = cursor.entry->bo;
+			shift = parent_shift;
+			frag_end = max(frag_end, ALIGN(frag_start + 1,
+				   1ULL << shift));
+		}
+
+		/* Looks good so far, calculate parameters for the update */
+		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
+		mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
+		pe_start = ((cursor.pfn >> shift) & mask) * 8;
+
+		if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+			/*
+			 * MMU notifier callback unlocked unmap huge page, leave is PDE entry,
+			 * only clear one entry. Next entry search again for PDE or PTE leave.
+			 */
+			entry_end = 1ULL << shift;
+		else
+			entry_end = ((uint64_t)mask + 1) << shift;
+		entry_end += cursor.pfn & ~(entry_end - 1);
+		entry_end = min(entry_end, end);
+
+		do {
+			struct amdgpu_vm *vm = params->vm;
+			uint64_t upd_end = min(entry_end, frag_end);
+			unsigned int nptes = (upd_end - frag_start) >> shift;
+			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+
+			/* This can happen when we set higher level PDs to
+			 * silent to stop fault floods.
+			 */
+			nptes = max(nptes, 1u);
+
+			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+						    min(nptes, 32u), dst, incr,
+						    upd_flags,
+						    vm->task_info ? vm->task_info->tgid : 0,
+						    vm->immediate.fence_context);
+			amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
+						   cursor.level, pe_start, dst,
+						   nptes, incr, upd_flags);
+
+			pe_start += nptes * 8;
+			dst += nptes * incr;
+
+			frag_start = upd_end;
+			if (frag_start >= frag_end) {
+				/* figure out the next fragment */
+				amdgpu_vm_pte_fragment(params, frag_start, end,
+						       flags, &frag, &frag_end);
+				if (frag < shift)
+					break;
+			}
+		} while (frag_start < entry_end);
+
+		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+			/* Free all child entries.
+			 * Update the tables with the flags and addresses and free up subsequent
+			 * tables in the case of huge pages or freed up areas.
+			 * This is the maximum you can free, because all other page tables are not
+			 * completely covered by the range and so potentially still in use.
+			 */
+			while (cursor.pfn < frag_start) {
+				/* Make sure previous mapping is freed */
+				if (cursor.entry->bo) {
+					params->needs_flush = true;
+					amdgpu_vm_pt_add_list(params, &cursor);
+				}
+				amdgpu_vm_pt_next(adev, &cursor);
+			}
+
+		} else if (frag >= shift) {
+			/* or just move on to the next on the same level. */
+			amdgpu_vm_pt_next(adev, &cursor);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * make root page directory and everything below it cpu accessible.
+ */
+int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	struct amdgpu_vm_pt_cursor cursor;
+	struct amdgpu_vm_bo_base *entry;
+
+	for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+
+		struct amdgpu_bo_vm *bo;
+		int r;
+
+		if (entry->bo) {
+			bo = to_amdgpu_bo_vm(entry->bo);
+			r = vm->update_funcs->map_table(bo);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
new file mode 100644
index 000000000000..36805dcfa159
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu_vm.h"
+#include "amdgpu_job.h"
+#include "amdgpu_object.h"
+#include "amdgpu_trace.h"
+
+#define AMDGPU_VM_SDMA_MIN_NUM_DW	256u
+#define AMDGPU_VM_SDMA_MAX_NUM_DW	(16u * 1024u)
+
+/**
+ * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped
+ *
+ * @table: newly allocated or validated PD/PT
+ */
+static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
+{
+	return amdgpu_ttm_alloc_gart(&table->bo.tbo);
+}
+
+/* Allocate a new job for @count PTE updates */
+static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
+				    unsigned int count, u64 k_job_id)
+{
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
+	struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
+		: &p->vm->delayed;
+	unsigned int ndw;
+	int r;
+
+	/* estimate how many dw we need */
+	ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+	if (p->pages_addr)
+		ndw += count * 2;
+	ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
+
+	r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
+				     ndw * 4, pool, &p->job, k_job_id);
+	if (r)
+		return r;
+
+	p->num_dw_left = ndw;
+	return 0;
+}
+
+/**
+ * amdgpu_vm_sdma_prepare - prepare SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @sync: amdgpu_sync object with fences to wait for
+ * @k_job_id: identifier of the job, for tracing purpose
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
+				  struct amdgpu_sync *sync, u64 k_job_id)
+{
+	int r;
+
+	r = amdgpu_vm_sdma_alloc_job(p, 0, k_job_id);
+	if (r)
+		return r;
+
+	if (!sync)
+		return 0;
+
+	r = amdgpu_sync_push_to_job(sync, p->job);
+	if (r) {
+		p->num_dw_left = 0;
+		amdgpu_job_free(p->job);
+	}
+	return r;
+}
+
+/**
+ * amdgpu_vm_sdma_commit - commit SDMA command submission
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @fence: resulting fence
+ *
+ * Returns:
+ * Negativ errno, 0 for success.
+ */
+static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
+				 struct dma_fence **fence)
+{
+	struct amdgpu_ib *ib = p->job->ibs;
+	struct amdgpu_ring *ring;
+	struct dma_fence *f;
+
+	ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
+			    sched);
+
+	WARN_ON(ib->length_dw == 0);
+	amdgpu_ring_pad_ib(ring, ib);
+
+	if (p->needs_flush)
+		atomic64_inc(&p->vm->tlb_seq);
+
+	WARN_ON(ib->length_dw > p->num_dw_left);
+	f = amdgpu_job_submit(p->job);
+
+	if (p->unlocked) {
+		struct dma_fence *tmp = dma_fence_get(f);
+
+		swap(p->vm->last_unlocked, tmp);
+		dma_fence_put(tmp);
+	} else {
+		dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f,
+				   DMA_RESV_USAGE_BOOKKEEP);
+	}
+
+	if (fence && !p->immediate) {
+		/*
+		 * Most hw generations now have a separate queue for page table
+		 * updates, but when the queue is shared with userspace we need
+		 * the extra CPU round trip to correctly flush the TLB.
+		 */
+		set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags);
+		swap(*fence, f);
+	}
+	dma_fence_put(f);
+	return 0;
+}
+
+/**
+ * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: addr of the page entry
+ * @count: number of page entries to copy
+ *
+ * Traces the parameters and calls the DMA function to copy the PTEs.
+ */
+static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
+				     struct amdgpu_bo *bo, uint64_t pe,
+				     unsigned count)
+{
+	struct amdgpu_ib *ib = p->job->ibs;
+	uint64_t src = ib->gpu_addr;
+
+	src += p->num_dw_left * 4;
+
+	pe += amdgpu_bo_gpu_offset_no_check(bo);
+	trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
+
+	amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
+}
+
+/**
+ * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @bo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Traces the parameters and calls the right asic functions
+ * to setup the page table using the DMA.
+ */
+static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
+				    struct amdgpu_bo *bo, uint64_t pe,
+				    uint64_t addr, unsigned count,
+				    uint32_t incr, uint64_t flags)
+{
+	struct amdgpu_ib *ib = p->job->ibs;
+
+	pe += amdgpu_bo_gpu_offset_no_check(bo);
+	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
+	if (count < 3) {
+		amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
+				    count, incr);
+	} else {
+		amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
+				      count, incr, flags);
+	}
+}
+
+/**
+ * amdgpu_vm_sdma_update - execute VM update
+ *
+ * @p: see amdgpu_vm_update_params definition
+ * @vmbo: PD/PT to update
+ * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: hw access flags
+ *
+ * Reserve space in the IB, setup mapping buffer on demand and write commands to
+ * the IB.
+ */
+static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
+				 struct amdgpu_bo_vm *vmbo, uint64_t pe,
+				 uint64_t addr, unsigned count, uint32_t incr,
+				 uint64_t flags)
+{
+	struct amdgpu_bo *bo = &vmbo->bo;
+	struct dma_resv_iter cursor;
+	unsigned int i, ndw, nptes;
+	struct dma_fence *fence;
+	uint64_t *pte;
+	int r;
+
+	/* Wait for PD/PT moves to be completed */
+	dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
+		dma_fence_get(fence);
+		r = drm_sched_job_add_dependency(&p->job->base, fence);
+		if (r) {
+			dma_fence_put(fence);
+			dma_resv_iter_end(&cursor);
+			return r;
+		}
+	}
+	dma_resv_iter_end(&cursor);
+
+	do {
+		ndw = p->num_dw_left;
+		ndw -= p->job->ibs->length_dw;
+
+		if (ndw < 32) {
+			r = amdgpu_vm_sdma_commit(p, NULL);
+			if (r)
+				return r;
+
+			r = amdgpu_vm_sdma_alloc_job(p, count,
+						     AMDGPU_KERNEL_JOB_ID_VM_UPDATE);
+			if (r)
+				return r;
+		}
+
+		if (!p->pages_addr) {
+			/* set page commands needed */
+			amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
+						incr, flags);
+			return 0;
+		}
+
+		/* copy commands needed */
+		ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
+
+		/* for padding */
+		ndw -= 7;
+
+		nptes = min(count, ndw / 2);
+
+		/* Put the PTEs at the end of the IB. */
+		p->num_dw_left -= nptes * 2;
+		pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
+		for (i = 0; i < nptes; ++i, addr += incr) {
+			pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
+			pte[i] |= flags;
+		}
+
+		amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
+
+		pe += nptes * 8;
+		count -= nptes;
+	} while (count);
+
+	return 0;
+}
+
+const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
+	.map_table = amdgpu_vm_sdma_map_table,
+	.prepare = amdgpu_vm_sdma_prepare,
+	.update = amdgpu_vm_sdma_update,
+	.commit = amdgpu_vm_sdma_commit
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index 000000000000..5d26797356a3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+	struct dma_fence	base;
+	struct amdgpu_device	*adev;
+	struct dma_fence	*dependency;
+	struct work_struct	work;
+	spinlock_t		lock;
+	uint16_t		pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+	return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+	struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+	int r;
+
+	if (f->dependency) {
+		dma_fence_wait(f->dependency, false);
+		dma_fence_put(f->dependency);
+		f->dependency = NULL;
+	}
+
+	r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+	if (r) {
+		dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+			f->pasid);
+		dma_fence_set_error(&f->base, r);
+	}
+
+	dma_fence_signal(&f->base);
+	dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+	.get_driver_name = amdgpu_tlb_fence_get_driver_name,
+	.get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+				struct dma_fence **fence)
+{
+	struct amdgpu_tlb_fence *f;
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (!f) {
+		/*
+		 * We can't fail since the PDEs and PTEs are already updated, so
+		 * just block for the dependency and execute the TLB flush
+		 */
+		if (*fence)
+			dma_fence_wait(*fence, false);
+
+		amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+		*fence = dma_fence_get_stub();
+		return;
+	}
+
+	f->adev = adev;
+	f->dependency = *fence;
+	f->pasid = vm->pasid;
+	INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+	spin_lock_init(&f->lock);
+
+	dma_fence_init64(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+			 vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+	/* TODO: We probably need a separate wq here */
+	dma_fence_get(&f->base);
+	schedule_work(&f->work);
+
+	*fence = &f->base;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
new file mode 100644
index 000000000000..aa78c2ee9e21
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -0,0 +1,1018 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_vpe.h"
+#include "amdgpu_smu.h"
+#include "soc15_common.h"
+#include "vpe_v6_1.h"
+
+#define AMDGPU_CSA_VPE_SIZE 	64
+/* VPE CSA resides in the 4th page of CSA */
+#define AMDGPU_CSA_VPE_OFFSET 	(4096 * 3)
+
+/* 1 second timeout */
+#define VPE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+#define VPE_MAX_DPM_LEVEL			4
+#define FIXED1_8_BITS_PER_FRACTIONAL_PART	8
+#define GET_PRATIO_INTEGER_PART(x)		((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART)
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev);
+
+static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder)
+{
+	*remainder = dividend % divisor;
+	return dividend / divisor;
+}
+
+static inline uint16_t complete_integer_division_u16(
+	uint16_t dividend,
+	uint16_t divisor,
+	uint16_t *remainder)
+{
+	return div16_u16_rem(dividend, divisor, (uint16_t *)remainder);
+}
+
+static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator)
+{
+	u16 arg1_value = numerator;
+	u16 arg2_value = denominator;
+
+	uint16_t remainder;
+
+	/* determine integer part */
+	uint16_t res_value = complete_integer_division_u16(
+		arg1_value, arg2_value, &remainder);
+
+	if (res_value > 127 /* CHAR_MAX */)
+		return 0;
+
+	/* determine fractional part */
+	{
+		unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART;
+
+		do {
+			remainder <<= 1;
+
+			res_value <<= 1;
+
+			if (remainder >= arg2_value) {
+				res_value |= 1;
+				remainder -= arg2_value;
+			}
+		} while (--i != 0);
+	}
+
+	/* round up LSB */
+	{
+		uint16_t summand = (remainder << 1) >= arg2_value;
+
+		if ((res_value + summand) > 32767 /* SHRT_MAX */)
+			return 0;
+
+		res_value += summand;
+	}
+
+	return res_value;
+}
+
+static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency)
+{
+	uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency);
+
+	if (GET_PRATIO_INTEGER_PART(pratio) > 1)
+		pratio = 0;
+
+	return pratio;
+}
+
+/*
+ * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest),
+ * VPE FW will dynamically decide which level should be used according to current loading.
+ *
+ * Get VPE and SOC clocks from PM, and select the appropriate four clock values,
+ * calculate the ratios of adjusting from one clock to another.
+ * The VPE FW can then request the appropriate frequency from the PMFW.
+ */
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = vpe->ring.adev;
+	uint32_t dpm_ctl;
+
+	if (adev->pm.dpm_enabled) {
+		struct dpm_clocks clock_table = { 0 };
+		struct dpm_clock *VPEClks;
+		struct dpm_clock *SOCClks;
+		uint32_t idx;
+		uint32_t vpeclk_enalbled_num = 0;
+		uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
+		uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
+
+		dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+		dpm_ctl |= 1; /* DPM enablement */
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+
+		/* Get VPECLK and SOCCLK */
+		if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) {
+			dev_dbg(adev->dev, "%s: get clock failed!\n", __func__);
+			goto disable_dpm;
+		}
+
+		SOCClks = clock_table.SocClocks;
+		VPEClks = clock_table.VPEClocks;
+
+		/* Comfirm enabled vpe clk num
+		 * Enabled VPE clocks are ordered from low to high in VPEClks
+		 * The highest valid clock index+1 is the number of VPEClks
+		 */
+		for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--)
+			if (VPEClks[idx-1].Freq)
+				vpeclk_enalbled_num = idx;
+
+		/* vpe dpm only cares 4 levels. */
+		for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
+			uint32_t soc_dpm_level;
+			uint32_t min_freq;
+
+			if (idx == 0)
+				soc_dpm_level = 0;
+			else
+				soc_dpm_level = (idx * 2) + 1;
+
+			/* clamp the max level */
+			if (soc_dpm_level > vpeclk_enalbled_num - 1)
+				soc_dpm_level = vpeclk_enalbled_num - 1;
+
+			min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
+				   SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;
+
+			switch (idx) {
+			case 0:
+				pratio_vmin_freq = min_freq;
+				break;
+			case 1:
+				pratio_vmid_freq = min_freq;
+				break;
+			case 2:
+				pratio_vnorm_freq = min_freq;
+				break;
+			case 3:
+				pratio_vmax_freq = min_freq;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) {
+			uint32_t pratio_ctl;
+
+			pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq);
+			pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq);
+			pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq);
+
+			pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18);
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl);		/* PRatio */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000);	/* 1ms, unit=1/24MHz */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000);	/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */
+			WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */
+			dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__);
+		} else {
+			dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__);
+			goto disable_dpm;
+		}
+	}
+	return 0;
+
+disable_dpm:
+	dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable));
+	dpm_ctl &= 0xfffffffe; /* Disable DPM */
+	WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
+	dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
+	return -EINVAL;
+}
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
+{
+	struct amdgpu_firmware_info ucode = {
+		.ucode_id = AMDGPU_UCODE_ID_VPE,
+		.mc_addr = adev->vpe.cmdbuf_gpu_addr,
+		.ucode_size = 8,
+	};
+
+	return psp_execute_ip_fw_load(&adev->psp, &ucode);
+}
+
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = vpe->ring.adev;
+	const struct vpe_firmware_header_v1_0 *vpe_hdr;
+	char fw_prefix[32];
+	int ret;
+
+	amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
+	ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s.bin", fw_prefix);
+	if (ret)
+		goto out;
+
+	vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
+	adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
+	adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		struct amdgpu_firmware_info *info;
+
+		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
+		info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
+		info->fw = adev->vpe.fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
+
+		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
+		info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
+		info->fw = adev->vpe.fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
+	}
+
+	return 0;
+out:
+	dev_err(adev->dev, "fail to initialize vpe microcode\n");
+	release_firmware(adev->vpe.fw);
+	adev->vpe.fw = NULL;
+	return ret;
+}
+
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+	struct amdgpu_ring *ring = &vpe->ring;
+	int ret;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->vm_hub = AMDGPU_MMHUB0(0);
+	ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
+	snprintf(ring->name, 4, "vpe");
+
+	ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
+			     AMDGPU_RING_PRIO_DEFAULT, NULL);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
+{
+	amdgpu_ring_fini(&vpe->ring);
+
+	return 0;
+}
+
+static int vpe_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+	case IP_VERSION(6, 1, 0):
+	case IP_VERSION(6, 1, 3):
+		vpe_v6_1_set_funcs(vpe);
+		break;
+	case IP_VERSION(6, 1, 1):
+		vpe_v6_1_set_funcs(vpe);
+		vpe->collaborate_mode = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	vpe_set_ring_funcs(adev);
+	vpe_set_regs(vpe);
+
+	dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false");
+
+	return 0;
+}
+
+static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
+	case IP_VERSION(6, 1, 1):
+		return adev->pm.fw_version < 0x0a640500;
+	default:
+		return false;
+	}
+}
+
+static int vpe_get_dpm_level(struct amdgpu_device *adev)
+{
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (!adev->pm.dpm_enabled)
+		return 0;
+
+	return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv));
+}
+
+static void vpe_idle_work_handler(struct work_struct *work)
+{
+	struct amdgpu_device *adev =
+		container_of(work, struct amdgpu_device, vpe.idle_work.work);
+	unsigned int fences = 0;
+
+	fences += amdgpu_fence_count_emitted(&adev->vpe.ring);
+	if (fences)
+		goto reschedule;
+
+	if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0)
+		goto reschedule;
+
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+	return;
+
+reschedule:
+	schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_common_init(struct amdgpu_vpe *vpe)
+{
+	struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
+	int r;
+
+	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_GTT,
+				    &adev->vpe.cmdbuf_obj,
+				    &adev->vpe.cmdbuf_gpu_addr,
+				    (void **)&adev->vpe.cmdbuf_cpu_addr);
+	if (r) {
+		dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
+		return r;
+	}
+
+	vpe->context_started = false;
+	INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler);
+
+	return 0;
+}
+
+static int vpe_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+	int ret;
+
+	ret = vpe_common_init(vpe);
+	if (ret)
+		goto out;
+
+	ret = vpe_irq_init(vpe);
+	if (ret)
+		goto out;
+
+	ret = vpe_ring_init(vpe);
+	if (ret)
+		goto out;
+
+	ret = vpe_init_microcode(vpe);
+	if (ret)
+		goto out;
+
+	adev->vpe.supported_reset =
+		 amdgpu_get_soft_full_reset_mask(&adev->vpe.ring);
+	if (!amdgpu_sriov_vf(adev))
+		adev->vpe.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+	ret = amdgpu_vpe_sysfs_reset_mask_init(adev);
+	if (ret)
+		goto out;
+out:
+	return ret;
+}
+
+static int vpe_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	release_firmware(vpe->fw);
+	vpe->fw = NULL;
+
+	amdgpu_vpe_sysfs_reset_mask_fini(adev);
+	vpe_ring_fini(vpe);
+
+	amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
+			      &adev->vpe.cmdbuf_gpu_addr,
+			      (void **)&adev->vpe.cmdbuf_cpu_addr);
+
+	return 0;
+}
+
+static int vpe_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+	int ret;
+
+	/* Power on VPE */
+	ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+						     AMD_PG_STATE_UNGATE);
+	if (ret)
+		return ret;
+
+	ret = vpe_load_microcode(vpe);
+	if (ret)
+		return ret;
+
+	ret = vpe_ring_start(vpe);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int vpe_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+	vpe_ring_stop(vpe);
+
+	/* Power off VPE */
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE);
+
+	return 0;
+}
+
+static int vpe_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return vpe_hw_fini(ip_block);
+}
+
+static int vpe_resume(struct amdgpu_ip_block *ip_block)
+{
+	return vpe_hw_init(ip_block);
+}
+
+static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (i == 0)
+			amdgpu_ring_write(ring, ring->funcs->nop |
+				VPE_CMD_NOP_HEADER_COUNT(count - 1));
+		else
+			amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t index = 0;
+	uint64_t csa_mc_addr;
+
+	if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
+		return 0;
+
+	csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
+		      index * AMDGPU_CSA_VPE_SIZE;
+
+	return csa_mc_addr;
+}
+
+static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring,
+				    uint32_t device_select,
+				    uint32_t exec_count)
+{
+	if (!ring->adev->vpe.collaborate_mode)
+		return;
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) |
+				(device_select << 16));
+	amdgpu_ring_write(ring, exec_count & 0x1fff);
+}
+
+static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
+			     struct amdgpu_job *job,
+			     struct amdgpu_ib *ib,
+			     uint32_t flags)
+{
+	uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
+	uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
+				VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
+
+	/* base must be 32 byte aligned */
+	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, ib->length_dw);
+	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
+				uint64_t seq, unsigned int flags)
+{
+	int i = 0;
+
+	do {
+		/* write the fence */
+		amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+		/* zero in first two bits */
+		WARN_ON_ONCE(addr & 0x3);
+		amdgpu_ring_write(ring, lower_32_bits(addr));
+		amdgpu_ring_write(ring, upper_32_bits(addr));
+		amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
+		addr += 4;
+	} while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
+
+	if (flags & AMDGPU_FENCE_FLAG_INT) {
+		/* generate an interrupt */
+		amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+
+}
+
+static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	vpe_ring_emit_pred_exec(ring, 0, 6);
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+				VPE_POLL_REGMEM_SUBOP_REGMEM) |
+				VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+				VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
+	amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+				VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
+}
+
+static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+{
+	vpe_ring_emit_pred_exec(ring, 0, 3);
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
+	amdgpu_ring_write(ring,	reg << 2);
+	amdgpu_ring_write(ring, val);
+}
+
+static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+				   uint32_t val, uint32_t mask)
+{
+	vpe_ring_emit_pred_exec(ring, 0, 6);
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
+				VPE_POLL_REGMEM_SUBOP_REGMEM) |
+				VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+				VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val); /* reference */
+	amdgpu_ring_write(ring, mask); /* mask */
+	amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+				VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
+				   uint64_t pd_addr)
+{
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
+					    uint64_t addr)
+{
+	unsigned int ret;
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, 1);
+	ret = ring->wptr & ring->buf_mask;
+	amdgpu_ring_write(ring, 0);
+
+	return ret;
+}
+
+static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+	uint32_t preempt_reg = vpe->regs.queue0_preempt;
+	int i, r = 0;
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	/* emit the trailing fence */
+	ring->trail_seq += 1;
+	amdgpu_ring_alloc(ring, 10);
+	vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
+	amdgpu_ring_commit(ring);
+
+	/* assert IB preemption */
+	WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
+	}
+
+	/* deassert IB preemption */
+	WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
+
+	/* deassert the preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+
+	return r;
+}
+
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
+				     enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (!adev->pm.dpm_enabled)
+		dev_err(adev->dev, "Without PM, cannot support powergating\n");
+
+	dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE");
+
+	if (state == AMD_PG_STATE_GATE) {
+		amdgpu_dpm_enable_vpe(adev, false);
+		vpe->context_started = false;
+	} else {
+		amdgpu_dpm_enable_vpe(adev, true);
+	}
+
+	return 0;
+}
+
+static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+	uint64_t rptr;
+
+	if (ring->use_doorbell) {
+		rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
+		dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
+	} else {
+		rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
+		rptr = rptr << 32;
+		rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
+		dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
+	}
+
+	return (rptr >> 2);
+}
+
+static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+	uint64_t wptr;
+
+	if (ring->use_doorbell) {
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+		dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
+	} else {
+		wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
+		wptr = wptr << 32;
+		wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
+		dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
+	}
+
+	return (wptr >> 2);
+}
+
+static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	if (ring->use_doorbell) {
+		dev_dbg(adev->dev, "Using doorbell, \
+			wptr_offs == 0x%08x, \
+			lower_32_bits(ring->wptr) << 2 == 0x%08x, \
+			upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+			ring->wptr_offs,
+			lower_32_bits(ring->wptr << 2),
+			upper_32_bits(ring->wptr << 2));
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
+		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+		if (vpe->collaborate_mode)
+			WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2);
+	} else {
+		int i;
+
+		for (i = 0; i < vpe->num_instances; i++) {
+			dev_dbg(adev->dev, "Not using doorbell, \
+				regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
+				regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
+				lower_32_bits(ring->wptr << 2),
+				upper_32_bits(ring->wptr << 2));
+			WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo),
+			       lower_32_bits(ring->wptr << 2));
+			WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi),
+			       upper_32_bits(ring->wptr << 2));
+		}
+	}
+}
+
+static int vpe_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const uint32_t test_pattern = 0xdeadbeef;
+	uint32_t index, i;
+	uint64_t wb_addr;
+	int ret;
+
+	ret = amdgpu_device_wb_get(adev, &index);
+	if (ret) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+		return ret;
+	}
+
+	adev->wb.wb[index] = 0;
+	wb_addr = adev->wb.gpu_addr + (index * 4);
+
+	ret = amdgpu_ring_alloc(ring, 4);
+	if (ret) {
+		dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
+		goto out;
+	}
+
+	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
+	amdgpu_ring_write(ring, lower_32_bits(wb_addr));
+	amdgpu_ring_write(ring, upper_32_bits(wb_addr));
+	amdgpu_ring_write(ring, test_pattern);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
+			goto out;
+		udelay(1);
+	}
+
+	ret = -ETIMEDOUT;
+out:
+	amdgpu_device_wb_free(adev, index);
+
+	return ret;
+}
+
+static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const uint32_t test_pattern = 0xdeadbeef;
+	struct amdgpu_ib ib = {};
+	struct dma_fence *f = NULL;
+	uint32_t index;
+	uint64_t wb_addr;
+	int ret;
+
+	ret = amdgpu_device_wb_get(adev, &index);
+	if (ret) {
+		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
+		return ret;
+	}
+
+	adev->wb.wb[index] = 0;
+	wb_addr = adev->wb.gpu_addr + (index * 4);
+
+	ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (ret)
+		goto err0;
+
+	ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
+	ib.ptr[1] = lower_32_bits(wb_addr);
+	ib.ptr[2] = upper_32_bits(wb_addr);
+	ib.ptr[3] = test_pattern;
+	ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+	ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+	ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+	ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
+	ib.length_dw = 8;
+
+	ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (ret)
+		goto err1;
+
+	ret = dma_fence_wait_timeout(f, false, timeout);
+	if (ret <= 0) {
+		ret = ret ? : -ETIMEDOUT;
+		goto err1;
+	}
+
+	ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
+
+err1:
+	amdgpu_ib_free(&ib, NULL);
+	dma_fence_put(f);
+err0:
+	amdgpu_device_wb_free(adev, index);
+
+	return ret;
+}
+
+static void vpe_ring_begin_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vpe *vpe = &adev->vpe;
+
+	cancel_delayed_work_sync(&adev->vpe.idle_work);
+
+	/* Power on VPE and notify VPE of new context  */
+	if (!vpe->context_started) {
+		uint32_t context_notify;
+
+		/* Power on VPE */
+		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE);
+
+		/* Indicates that a job from a new context has been submitted. */
+		context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator));
+		if ((context_notify & 0x1) == 0)
+			context_notify |= 0x1;
+		else
+			context_notify &= ~(0x1);
+		WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify);
+		vpe->context_started = true;
+	}
+}
+
+static void vpe_ring_end_use(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
+}
+
+static int vpe_ring_reset(struct amdgpu_ring *ring,
+			  unsigned int vmid,
+			  struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+						   AMD_PG_STATE_GATE);
+	if (r)
+		return r;
+	r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+						   AMD_PG_STATE_UNGATE);
+	if (r)
+		return r;
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (!adev)
+		return -ENODEV;
+
+	return amdgpu_show_reset_mask(buf, adev->vpe.supported_reset);
+}
+
+static DEVICE_ATTR(vpe_reset_mask, 0444,
+		   amdgpu_get_vpe_reset_mask, NULL);
+
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->vpe.num_instances) {
+		r = device_create_file(adev->dev, &dev_attr_vpe_reset_mask);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+	if (adev->dev->kobj.sd) {
+		if (adev->vpe.num_instances)
+			device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+	}
+}
+
+static const struct amdgpu_ring_funcs vpe_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_VPE,
+	.align_mask = 0xf,
+	.nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
+	.support_64bit_ptrs = true,
+	.get_rptr = vpe_ring_get_rptr,
+	.get_wptr = vpe_ring_get_wptr,
+	.set_wptr = vpe_ring_set_wptr,
+	.emit_frame_size =
+		5 + /* vpe_ring_init_cond_exec */
+		6 + /* vpe_ring_emit_pipeline_sync */
+		10 + 10 + 10 + /* vpe_ring_emit_fence */
+		/* vpe_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
+	.emit_ib_size = 7 + 6,
+	.emit_ib = vpe_ring_emit_ib,
+	.emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
+	.emit_fence = vpe_ring_emit_fence,
+	.emit_vm_flush = vpe_ring_emit_vm_flush,
+	.emit_wreg = vpe_ring_emit_wreg,
+	.emit_reg_wait = vpe_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.insert_nop = vpe_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.test_ring = vpe_ring_test_ring,
+	.test_ib = vpe_ring_test_ib,
+	.init_cond_exec = vpe_ring_init_cond_exec,
+	.preempt_ib = vpe_ring_preempt_ib,
+	.begin_use = vpe_ring_begin_use,
+	.end_use = vpe_ring_end_use,
+	.reset = vpe_ring_reset,
+};
+
+static void vpe_set_ring_funcs(struct amdgpu_device *adev)
+{
+	adev->vpe.ring.funcs = &vpe_ring_funcs;
+}
+
+const struct amd_ip_funcs vpe_ip_funcs = {
+	.name = "vpe_v6_1",
+	.early_init = vpe_early_init,
+	.sw_init = vpe_sw_init,
+	.sw_fini = vpe_sw_fini,
+	.hw_init = vpe_hw_init,
+	.hw_fini = vpe_hw_fini,
+	.suspend = vpe_suspend,
+	.resume = vpe_resume,
+	.set_clockgating_state = vpe_set_clockgating_state,
+	.set_powergating_state = vpe_set_powergating_state,
+};
+
+const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_VPE,
+	.major = 6,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &vpe_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
new file mode 100644
index 000000000000..695da740a97e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __AMDGPU_VPE_H__
+#define __AMDGPU_VPE_H__
+
+#include "amdgpu_ring.h"
+#include "amdgpu_irq.h"
+#include "vpe_6_1_fw_if.h"
+
+#define AMDGPU_MAX_VPE_INSTANCES 2
+
+struct amdgpu_vpe;
+
+struct vpe_funcs {
+	uint32_t (*get_reg_offset)(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset);
+	int (*set_regs)(struct amdgpu_vpe *vpe);
+	int (*irq_init)(struct amdgpu_vpe *vpe);
+	int (*init_microcode)(struct amdgpu_vpe *vpe);
+	int (*load_microcode)(struct amdgpu_vpe *vpe);
+	int (*ring_init)(struct amdgpu_vpe *vpe);
+	int (*ring_start)(struct amdgpu_vpe *vpe);
+	int (*ring_stop)(struct amdgpu_vpe *vpe);
+	int (*ring_fini)(struct amdgpu_vpe *vpe);
+};
+
+struct vpe_regs {
+	uint32_t queue0_rb_rptr_lo;
+	uint32_t queue0_rb_rptr_hi;
+	uint32_t queue0_rb_wptr_lo;
+	uint32_t queue0_rb_wptr_hi;
+	uint32_t queue0_preempt;
+
+	uint32_t dpm_enable;
+	uint32_t dpm_pratio;
+	uint32_t dpm_request_interval;
+	uint32_t dpm_decision_threshold;
+	uint32_t dpm_busy_clamp_threshold;
+	uint32_t dpm_idle_clamp_threshold;
+	uint32_t dpm_request_lv;
+	uint32_t context_indicator;
+};
+
+struct amdgpu_vpe {
+	struct amdgpu_ring		ring;
+	struct amdgpu_irq_src		trap_irq;
+
+	const struct vpe_funcs		*funcs;
+	struct vpe_regs			regs;
+
+	const struct firmware		*fw;
+	uint32_t			fw_version;
+	uint32_t			feature_version;
+
+	struct amdgpu_bo		*cmdbuf_obj;
+	uint64_t			cmdbuf_gpu_addr;
+	uint32_t			*cmdbuf_cpu_addr;
+	struct delayed_work		idle_work;
+	bool				context_started;
+
+	uint32_t			num_instances;
+	bool				collaborate_mode;
+	uint32_t			supported_reset;
+};
+
+int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
+int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
+int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev);
+
+#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
+#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
+#define vpe_ring_stop(vpe) ((vpe)->funcs->ring_stop ? (vpe)->funcs->ring_stop((vpe)) : 0)
+#define vpe_ring_fini(vpe) ((vpe)->funcs->ring_fini ? (vpe)->funcs->ring_fini((vpe)) : 0)
+
+#define vpe_get_reg_offset(vpe, inst, offset) \
+		((vpe)->funcs->get_reg_offset ? (vpe)->funcs->get_reg_offset((vpe), (inst), (offset)) : 0)
+#define vpe_set_regs(vpe) \
+		((vpe)->funcs->set_regs ? (vpe)->funcs->set_regs((vpe)) : 0)
+#define vpe_irq_init(vpe) \
+		((vpe)->funcs->irq_init ? (vpe)->funcs->irq_init((vpe)) : 0)
+#define vpe_init_microcode(vpe) \
+		((vpe)->funcs->init_microcode ? (vpe)->funcs->init_microcode((vpe)) : 0)
+#define vpe_load_microcode(vpe) \
+		((vpe)->funcs->load_microcode ? (vpe)->funcs->load_microcode((vpe)) : 0)
+
+extern const struct amdgpu_ip_block_version vpe_v6_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
new file mode 100644
index 000000000000..9d934c07fa6b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -0,0 +1,977 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#include <linux/dma-mapping.h>
+#include <drm/ttm/ttm_range_manager.h>
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_res_cursor.h"
+#include "atom.h"
+
+#define AMDGPU_MAX_SG_SEGMENT_SIZE	(2UL << 30)
+
+struct amdgpu_vram_reservation {
+	u64 start;
+	u64 size;
+	struct list_head allocated;
+	struct list_head blocks;
+};
+
+static inline struct amdgpu_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct amdgpu_vram_mgr, manager);
+}
+
+static inline struct amdgpu_device *
+to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
+{
+	return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
+}
+
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+	return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 start, size;
+
+	block = amdgpu_vram_mgr_first_block(head);
+	if (!block)
+		return false;
+
+	while (head != block->link.next) {
+		start = amdgpu_vram_mgr_block_start(block);
+		size = amdgpu_vram_mgr_block_size(block);
+
+		block = list_entry(block->link.next, struct drm_buddy_block, link);
+		if (start + size != amdgpu_vram_mgr_block_start(block))
+			return false;
+	}
+
+	return true;
+}
+
+static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head)
+{
+	struct drm_buddy_block *block;
+	u64 size = 0;
+
+	list_for_each_entry(block, head, link)
+		size += amdgpu_vram_mgr_block_size(block);
+
+	return size;
+}
+
+/**
+ * DOC: mem_info_vram_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total VRAM
+ * available on the device
+ * The file mem_info_vram_total is used for this and returns the total
+ * amount of VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vram_total_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%llu\n", adev->gmc.real_vram_size);
+}
+
+/**
+ * DOC: mem_info_vis_vram_total
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total
+ * visible VRAM available on the device
+ * The file mem_info_vis_vram_total is used for this and returns the total
+ * amount of visible VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%llu\n", adev->gmc.visible_vram_size);
+}
+
+/**
+ * DOC: mem_info_vram_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total VRAM
+ * available on the device
+ * The file mem_info_vram_used is used for this and returns the total
+ * amount of currently used VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct ttm_resource_manager *man = &adev->mman.vram_mgr.manager;
+
+	return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
+}
+
+/**
+ * DOC: mem_info_vis_vram_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total of
+ * used visible VRAM
+ * The file mem_info_vis_vram_used is used for this and returns the total
+ * amount of currently used visible VRAM in bytes
+ */
+static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
+						  struct device_attribute *attr,
+						  char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr));
+}
+
+/**
+ * DOC: mem_info_vram_vendor
+ *
+ * The amdgpu driver provides a sysfs API for reporting the vendor of the
+ * installed VRAM
+ * The file mem_info_vram_vendor is used for this and returns the name of the
+ * vendor.
+ */
+static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	switch (adev->gmc.vram_vendor) {
+	case SAMSUNG:
+		return sysfs_emit(buf, "samsung\n");
+	case INFINEON:
+		return sysfs_emit(buf, "infineon\n");
+	case ELPIDA:
+		return sysfs_emit(buf, "elpida\n");
+	case ETRON:
+		return sysfs_emit(buf, "etron\n");
+	case NANYA:
+		return sysfs_emit(buf, "nanya\n");
+	case HYNIX:
+		return sysfs_emit(buf, "hynix\n");
+	case MOSEL:
+		return sysfs_emit(buf, "mosel\n");
+	case WINBOND:
+		return sysfs_emit(buf, "winbond\n");
+	case ESMT:
+		return sysfs_emit(buf, "esmt\n");
+	case MICRON:
+		return sysfs_emit(buf, "micron\n");
+	default:
+		return sysfs_emit(buf, "unknown\n");
+	}
+}
+
+static DEVICE_ATTR(mem_info_vram_total, S_IRUGO,
+		   amdgpu_mem_info_vram_total_show, NULL);
+static DEVICE_ATTR(mem_info_vis_vram_total, S_IRUGO,
+		   amdgpu_mem_info_vis_vram_total_show,NULL);
+static DEVICE_ATTR(mem_info_vram_used, S_IRUGO,
+		   amdgpu_mem_info_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vis_vram_used, S_IRUGO,
+		   amdgpu_mem_info_vis_vram_used_show, NULL);
+static DEVICE_ATTR(mem_info_vram_vendor, S_IRUGO,
+		   amdgpu_mem_info_vram_vendor, NULL);
+
+static struct attribute *amdgpu_vram_mgr_attributes[] = {
+	&dev_attr_mem_info_vram_total.attr,
+	&dev_attr_mem_info_vis_vram_total.attr,
+	&dev_attr_mem_info_vram_used.attr,
+	&dev_attr_mem_info_vis_vram_used.attr,
+	&dev_attr_mem_info_vram_vendor.attr,
+	NULL
+};
+
+static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj,
+					    struct attribute *attr, int i)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	if (attr == &dev_attr_mem_info_vram_vendor.attr &&
+	    !adev->gmc.vram_vendor)
+		return 0;
+
+	if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager))
+		return 0;
+
+	return attr->mode;
+}
+
+const struct attribute_group amdgpu_vram_mgr_attr_group = {
+	.attrs = amdgpu_vram_mgr_attributes,
+	.is_visible = amdgpu_vram_attrs_is_visible
+};
+
+/**
+ * amdgpu_vram_mgr_vis_size - Calculate visible block size
+ *
+ * @adev: amdgpu_device pointer
+ * @block: DRM BUDDY block structure
+ *
+ * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
+ */
+static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
+				    struct drm_buddy_block *block)
+{
+	u64 start = amdgpu_vram_mgr_block_start(block);
+	u64 end = start + amdgpu_vram_mgr_block_size(block);
+
+	if (start >= adev->gmc.visible_vram_size)
+		return 0;
+
+	return (end > adev->gmc.visible_vram_size ?
+		adev->gmc.visible_vram_size : end) - start;
+}
+
+/**
+ * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_resource *res = bo->tbo.resource;
+	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+	struct drm_buddy_block *block;
+	u64 usage = 0;
+
+	if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+		return amdgpu_bo_size(bo);
+
+	if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
+		return 0;
+
+	list_for_each_entry(block, &vres->blocks, link)
+		usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	return usage;
+}
+
+/* Commit the reservation of VRAM pages */
+static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
+{
+	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct amdgpu_vram_reservation *rsv, *temp;
+	struct drm_buddy_block *block;
+	uint64_t vis_usage;
+
+	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
+		if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+					   rsv->size, mm->chunk_size, &rsv->allocated,
+					   DRM_BUDDY_RANGE_ALLOCATION))
+			continue;
+
+		block = amdgpu_vram_mgr_first_block(&rsv->allocated);
+		if (!block)
+			continue;
+
+		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
+			rsv->start, rsv->size);
+
+		vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+		atomic64_add(vis_usage, &mgr->vis_usage);
+		spin_lock(&man->bdev->lru_lock);
+		man->usage += rsv->size;
+		spin_unlock(&man->bdev->lru_lock);
+		list_move(&rsv->blocks, &mgr->reserved_pages);
+	}
+}
+
+/**
+ * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ * @start: start address of the range in VRAM
+ * @size: size of the range
+ *
+ * Reserve memory from start address with the specified size in VRAM
+ */
+int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
+				  uint64_t start, uint64_t size)
+{
+	struct amdgpu_vram_reservation *rsv;
+
+	rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
+	if (!rsv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&rsv->allocated);
+	INIT_LIST_HEAD(&rsv->blocks);
+
+	rsv->start = start;
+	rsv->size = size;
+
+	mutex_lock(&mgr->lock);
+	list_add_tail(&rsv->blocks, &mgr->reservations_pending);
+	amdgpu_vram_mgr_do_reserve(&mgr->manager);
+	mutex_unlock(&mgr->lock);
+
+	return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_query_page_status - query the reservation status
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ * @start: start address of a page in VRAM
+ *
+ * Returns:
+ *	-EBUSY: the page is still hold and in pending list
+ *	0: the page has been reserved
+ *	-ENOENT: the input page is not a reservation
+ */
+int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
+				      uint64_t start)
+{
+	struct amdgpu_vram_reservation *rsv;
+	int ret;
+
+	mutex_lock(&mgr->lock);
+
+	list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
+
+	list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
+		if (rsv->start <= start &&
+		    (start < (rsv->start + rsv->size))) {
+			ret = 0;
+			goto out;
+		}
+	}
+
+	ret = -ENOENT;
+out:
+	mutex_unlock(&mgr->lock);
+	return ret;
+}
+
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+			uint64_t address, struct amdgpu_vram_block_info *info)
+{
+	struct amdgpu_vram_mgr_resource *vres;
+	struct drm_buddy_block *block;
+	u64 start, size;
+	int ret = -ENOENT;
+
+	mutex_lock(&mgr->lock);
+	list_for_each_entry(vres, &mgr->allocated_vres_list, vres_node) {
+		list_for_each_entry(block, &vres->blocks, link) {
+			start = amdgpu_vram_mgr_block_start(block);
+			size = amdgpu_vram_mgr_block_size(block);
+			if ((start <= address) && (address < (start + size))) {
+				info->start = start;
+				info->size = size;
+				memcpy(&info->task, &vres->task, sizeof(vres->task));
+				ret = 0;
+				goto out;
+			}
+		}
+	}
+
+out:
+	mutex_unlock(&mgr->lock);
+
+	return ret;
+}
+
+/**
+ * amdgpu_vram_mgr_new - allocate new ranges
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @res: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
+			       struct ttm_buffer_object *tbo,
+			       const struct ttm_place *place,
+			       struct ttm_resource **res)
+{
+	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+	u64 vis_usage = 0, max_bytes, min_block_size;
+	struct amdgpu_vram_mgr_resource *vres;
+	u64 size, remaining_size, lpfn, fpfn;
+	unsigned int adjust_dcc_size = 0;
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+	unsigned long pages_per_block;
+	int r;
+
+	lpfn = (u64)place->lpfn << PAGE_SHIFT;
+	if (!lpfn || lpfn > man->size)
+		lpfn = man->size;
+
+	fpfn = (u64)place->fpfn << PAGE_SHIFT;
+
+	max_bytes = adev->gmc.mc_vram_size;
+	if (tbo->type != ttm_bo_type_kernel)
+		max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
+		pages_per_block = ~0ul;
+	} else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+		pages_per_block = HPAGE_PMD_NR;
+#else
+		/* default to 2MB */
+		pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+#endif
+		pages_per_block = max_t(u32, pages_per_block,
+					tbo->page_alignment);
+	}
+
+	vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+	if (!vres)
+		return -ENOMEM;
+
+	ttm_resource_init(tbo, place, &vres->base);
+
+	/* bail out quickly if there's likely not enough VRAM for this BO */
+	if (ttm_resource_manager_usage(man) > max_bytes) {
+		r = -ENOSPC;
+		goto error_fini;
+	}
+
+	INIT_LIST_HEAD(&vres->blocks);
+
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+		vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+		vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
+	if (fpfn || lpfn != mgr->mm.size)
+		/* Allocate blocks in desired range */
+		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+	if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC &&
+	    adev->gmc.gmc_funcs->get_dcc_alignment)
+		adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev);
+
+	remaining_size = (u64)vres->base.size;
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+		unsigned int dcc_size;
+
+		dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size);
+		remaining_size = (u64)dcc_size;
+
+		vres->flags |= DRM_BUDDY_TRIM_DISABLE;
+	}
+
+	mutex_lock(&mgr->lock);
+	while (remaining_size) {
+		if (tbo->page_alignment)
+			min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
+		else
+			min_block_size = mgr->default_page_size;
+
+		size = remaining_size;
+
+		if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size)
+			min_block_size = size;
+		else if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
+			 !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+			min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+
+		BUG_ON(min_block_size < mm->chunk_size);
+
+		r = drm_buddy_alloc_blocks(mm, fpfn,
+					   lpfn,
+					   size,
+					   min_block_size,
+					   &vres->blocks,
+					   vres->flags);
+
+		if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+		    !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+			vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+			pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+						tbo->page_alignment);
+
+			continue;
+		}
+
+		if (unlikely(r))
+			goto error_free_blocks;
+
+		if (size > remaining_size)
+			remaining_size = 0;
+		else
+			remaining_size -= size;
+	}
+
+	vres->task.pid = task_pid_nr(current);
+	get_task_comm(vres->task.comm, current);
+	list_add_tail(&vres->vres_node, &mgr->allocated_vres_list);
+
+	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
+		struct drm_buddy_block *dcc_block;
+		unsigned long dcc_start;
+		u64 trim_start;
+
+		dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks);
+		/* Adjust the start address for DCC buffers only */
+		dcc_start =
+			roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block),
+				adjust_dcc_size);
+		trim_start = (u64)dcc_start;
+		drm_buddy_block_trim(mm, &trim_start,
+				     (u64)vres->base.size,
+				     &vres->blocks);
+	}
+	mutex_unlock(&mgr->lock);
+
+	vres->base.start = 0;
+	size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
+		     vres->base.size);
+	list_for_each_entry(block, &vres->blocks, link) {
+		unsigned long start;
+
+		start = amdgpu_vram_mgr_block_start(block) +
+			amdgpu_vram_mgr_block_size(block);
+		start >>= PAGE_SHIFT;
+
+		if (start > PFN_UP(size))
+			start -= PFN_UP(size);
+		else
+			start = 0;
+		vres->base.start = max(vres->base.start, start);
+
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+	}
+
+	if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
+		vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+
+	if (adev->gmc.xgmi.connected_to_cpu)
+		vres->base.bus.caching = ttm_cached;
+	else
+		vres->base.bus.caching = ttm_write_combined;
+
+	atomic64_add(vis_usage, &mgr->vis_usage);
+	*res = &vres->base;
+	return 0;
+
+error_free_blocks:
+	drm_buddy_free_list(mm, &vres->blocks, 0);
+	mutex_unlock(&mgr->lock);
+error_fini:
+	ttm_resource_fini(man, &vres->base);
+	kfree(vres);
+
+	return r;
+}
+
+/**
+ * amdgpu_vram_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @res: TTM memory object
+ *
+ * Free the allocated VRAM again.
+ */
+static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
+				struct ttm_resource *res)
+{
+	struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct amdgpu_device *adev = to_amdgpu_device(mgr);
+	struct drm_buddy *mm = &mgr->mm;
+	struct drm_buddy_block *block;
+	uint64_t vis_usage = 0;
+
+	mutex_lock(&mgr->lock);
+
+	list_del(&vres->vres_node);
+	memset(&vres->task, 0, sizeof(vres->task));
+
+	list_for_each_entry(block, &vres->blocks, link)
+		vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+	drm_buddy_free_list(mm, &vres->blocks, vres->flags);
+	amdgpu_vram_mgr_do_reserve(man);
+	mutex_unlock(&mgr->lock);
+
+	atomic64_sub(vis_usage, &mgr->vis_usage);
+
+	ttm_resource_fini(man, res);
+	kfree(vres);
+}
+
+/**
+ * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
+ *
+ * @adev: amdgpu device pointer
+ * @res: TTM memory object
+ * @offset: byte offset from the base of VRAM BO
+ * @length: number of bytes to export in sg_table
+ * @dev: the other device
+ * @dir: dma direction
+ * @sgt: resulting sg table
+ *
+ * Allocate and fill a sg table from a VRAM allocation.
+ */
+int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
+			      struct ttm_resource *res,
+			      u64 offset, u64 length,
+			      struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table **sgt)
+{
+	struct amdgpu_res_cursor cursor;
+	struct scatterlist *sg;
+	int num_entries = 0;
+	int i, r;
+
+	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
+	if (!*sgt)
+		return -ENOMEM;
+
+	/* Determine the number of DRM_BUDDY blocks to export */
+	amdgpu_res_first(res, offset, length, &cursor);
+	while (cursor.remaining) {
+		num_entries++;
+		amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE));
+	}
+
+	r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
+	if (r)
+		goto error_free;
+
+	/* Initialize scatterlist nodes of sg_table */
+	for_each_sgtable_sg((*sgt), sg, i)
+		sg->length = 0;
+
+	/*
+	 * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+	 * @note: Use iterator api to get first the DRM_BUDDY block
+	 * and the number of bytes from it. Access the following
+	 * DRM_BUDDY block(s) if more buffer needs to exported
+	 */
+	amdgpu_res_first(res, offset, length, &cursor);
+	for_each_sgtable_sg((*sgt), sg, i) {
+		phys_addr_t phys = cursor.start + adev->gmc.aper_base;
+		unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE);
+		dma_addr_t addr;
+
+		addr = dma_map_resource(dev, phys, size, dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+		r = dma_mapping_error(dev, addr);
+		if (r)
+			goto error_unmap;
+
+		sg_set_page(sg, NULL, size, 0);
+		sg_dma_address(sg) = addr;
+		sg_dma_len(sg) = size;
+
+		amdgpu_res_next(&cursor, size);
+	}
+
+	return 0;
+
+error_unmap:
+	for_each_sgtable_sg((*sgt), sg, i) {
+		if (!sg->length)
+			continue;
+
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	}
+	sg_free_table(*sgt);
+
+error_free:
+	kfree(*sgt);
+	return r;
+}
+
+/**
+ * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table
+ *
+ * @dev: device pointer
+ * @dir: data direction of resource to unmap
+ * @sgt: sg table to free
+ *
+ * Free a previously allocate sg table.
+ */
+void amdgpu_vram_mgr_free_sgt(struct device *dev,
+			      enum dma_data_direction dir,
+			      struct sg_table *sgt)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sgtable_sg(sgt, sg, i)
+		dma_unmap_resource(dev, sg->dma_address,
+				   sg->length, dir,
+				   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+/**
+ * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part
+ *
+ * @mgr: amdgpu_vram_mgr pointer
+ *
+ * Returns how many bytes are used in the visible part of VRAM
+ */
+uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr)
+{
+	return atomic64_read(&mgr->vis_usage);
+}
+
+/**
+ * amdgpu_vram_mgr_clear_reset_blocks - reset clear blocks
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Reset the cleared drm buddy blocks.
+ */
+void amdgpu_vram_mgr_clear_reset_blocks(struct amdgpu_device *adev)
+{
+	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+	struct drm_buddy *mm = &mgr->mm;
+
+	mutex_lock(&mgr->lock);
+	drm_buddy_reset_clear(mm, false);
+	mutex_unlock(&mgr->lock);
+}
+
+/**
+ * amdgpu_vram_mgr_intersects - test each drm buddy block for intersection
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for intersection for eviction decision.
+ */
+static bool amdgpu_vram_mgr_intersects(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+	struct drm_buddy_block *block;
+
+	/* Check each drm buddy block individually */
+	list_for_each_entry(block, &mgr->blocks, link) {
+		unsigned long fpfn =
+			amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+		if (place->fpfn < lpfn &&
+		    (!place->lpfn || place->lpfn > fpfn))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * amdgpu_vram_mgr_compatible - test each drm buddy block for compatibility
+ *
+ * @man: TTM memory type manager
+ * @res: The resource to test
+ * @place: The place to test against
+ * @size: Size of the new allocation
+ *
+ * Test each drm buddy block for placement compatibility.
+ */
+static bool amdgpu_vram_mgr_compatible(struct ttm_resource_manager *man,
+				       struct ttm_resource *res,
+				       const struct ttm_place *place,
+				       size_t size)
+{
+	struct amdgpu_vram_mgr_resource *mgr = to_amdgpu_vram_mgr_resource(res);
+	struct drm_buddy_block *block;
+
+	/* Check each drm buddy block individually */
+	list_for_each_entry(block, &mgr->blocks, link) {
+		unsigned long fpfn =
+			amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+		unsigned long lpfn = fpfn +
+			(amdgpu_vram_mgr_block_size(block) >> PAGE_SHIFT);
+
+		if (fpfn < place->fpfn ||
+		    (place->lpfn && lpfn > place->lpfn))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * amdgpu_vram_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @printer: DRM printer to use
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
+				  struct drm_printer *printer)
+{
+	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+	struct drm_buddy *mm = &mgr->mm;
+	struct amdgpu_vram_reservation *rsv;
+
+	drm_printf(printer, "  vis usage:%llu\n",
+		   amdgpu_vram_mgr_vis_usage(mgr));
+
+	mutex_lock(&mgr->lock);
+	drm_printf(printer, "default_page_size: %lluKiB\n",
+		   mgr->default_page_size >> 10);
+
+	drm_buddy_print(mm, printer);
+
+	drm_printf(printer, "reserved:\n");
+	list_for_each_entry(rsv, &mgr->reserved_pages, blocks)
+		drm_printf(printer, "%#018llx-%#018llx: %llu\n",
+			rsv->start, rsv->start + rsv->size, rsv->size);
+	mutex_unlock(&mgr->lock);
+}
+
+static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
+	.alloc	= amdgpu_vram_mgr_new,
+	.free	= amdgpu_vram_mgr_del,
+	.intersects = amdgpu_vram_mgr_intersects,
+	.compatible = amdgpu_vram_mgr_compatible,
+	.debug	= amdgpu_vram_mgr_debug
+};
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	int err;
+
+	man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
+	if (IS_ERR(man->cg))
+		return PTR_ERR(man->cg);
+	ttm_resource_manager_init(man, &adev->mman.bdev,
+				  adev->gmc.real_vram_size);
+
+	mutex_init(&mgr->lock);
+	INIT_LIST_HEAD(&mgr->reservations_pending);
+	INIT_LIST_HEAD(&mgr->reserved_pages);
+	INIT_LIST_HEAD(&mgr->allocated_vres_list);
+	mgr->default_page_size = PAGE_SIZE;
+
+	man->func = &amdgpu_vram_mgr_func;
+	err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+	if (err)
+		return err;
+
+	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+	return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	int ret;
+	struct amdgpu_vram_reservation *rsv, *temp;
+
+	ttm_resource_manager_set_used(man, false);
+
+	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+	if (ret)
+		return;
+
+	mutex_lock(&mgr->lock);
+	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
+		kfree(rsv);
+
+	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
+		drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
+		kfree(rsv);
+	}
+	if (!adev->gmc.is_app_apu)
+		drm_buddy_fini(&mgr->mm);
+	mutex_unlock(&mgr->lock);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..5f5fd9a911c2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr {
+	struct ttm_resource_manager manager;
+	struct drm_buddy mm;
+	/* protects access to buffer objects */
+	struct mutex lock;
+	struct list_head reservations_pending;
+	struct list_head reserved_pages;
+	atomic64_t vis_usage;
+	u64 default_page_size;
+	struct list_head allocated_vres_list;
+};
+
+struct amdgpu_vres_task {
+	pid_t pid;
+	char  comm[TASK_COMM_LEN];
+};
+
+struct amdgpu_vram_block_info {
+	u64 start;
+	u64 size;
+	struct amdgpu_vres_task task;
+};
+
+struct amdgpu_vram_mgr_resource {
+	struct ttm_resource base;
+	struct list_head blocks;
+	unsigned long flags;
+	struct list_head vres_node;
+	struct amdgpu_vres_task task;
+};
+
+static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+	return (u64)PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+	return drm_buddy_block_is_clear(block);
+}
+
+static inline struct amdgpu_vram_mgr_resource *
+to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
+{
+	return container_of(res, struct amdgpu_vram_mgr_resource, base);
+}
+
+static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
+{
+	struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res);
+
+	WARN_ON(ares->flags & DRM_BUDDY_CLEARED);
+	ares->flags |= DRM_BUDDY_CLEARED;
+}
+
+int amdgpu_vram_mgr_query_address_block_info(struct amdgpu_vram_mgr *mgr,
+		uint64_t address, struct amdgpu_vram_block_info *info);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
new file mode 100644
index 000000000000..1083db8cea2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+#include "amdgpu_drv.h"
+
+#include <drm/drm_drv.h>
+#include "../amdxcp/amdgpu_xcp_drv.h"
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr);
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr);
+
+static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
+			    struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
+{
+	int (*run_func)(void *handle, uint32_t inst_mask);
+	int ret = 0;
+
+	if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs)
+		return 0;
+
+	run_func = NULL;
+
+	switch (xcp_state) {
+	case AMDGPU_XCP_PREPARE_SUSPEND:
+		run_func = xcp_ip->ip_funcs->prepare_suspend;
+		break;
+	case AMDGPU_XCP_SUSPEND:
+		run_func = xcp_ip->ip_funcs->suspend;
+		break;
+	case AMDGPU_XCP_PREPARE_RESUME:
+		run_func = xcp_ip->ip_funcs->prepare_resume;
+		break;
+	case AMDGPU_XCP_RESUME:
+		run_func = xcp_ip->ip_funcs->resume;
+		break;
+	}
+
+	if (run_func)
+		ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask);
+
+	return ret;
+}
+
+static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+				     int state)
+{
+	struct amdgpu_xcp_ip *xcp_ip;
+	struct amdgpu_xcp *xcp;
+	int i, ret;
+
+	if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid)
+		return -EINVAL;
+
+	xcp = &xcp_mgr->xcp[xcp_id];
+	for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) {
+		xcp_ip = &xcp->ip[i];
+		ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+	return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+					 AMDGPU_XCP_PREPARE_SUSPEND);
+}
+
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+	return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND);
+}
+
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+	return amdgpu_xcp_run_transition(xcp_mgr, xcp_id,
+					 AMDGPU_XCP_PREPARE_RESUME);
+}
+
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id)
+{
+	return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME);
+}
+
+static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+				   struct amdgpu_xcp_ip *ip)
+{
+	struct amdgpu_xcp *xcp;
+
+	if (!ip)
+		return;
+
+	xcp = &xcp_mgr->xcp[xcp_id];
+	xcp->ip[ip->ip_id] = *ip;
+	xcp->ip[ip->ip_id].valid = true;
+
+	xcp->valid = true;
+}
+
+static void __amdgpu_xcp_set_unique_id(struct amdgpu_xcp_mgr *xcp_mgr,
+				       int xcp_id)
+{
+	struct amdgpu_xcp *xcp = &xcp_mgr->xcp[xcp_id];
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	uint32_t inst_mask;
+	uint64_t uid;
+	int i;
+
+	if (!amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask) &&
+	    inst_mask) {
+		i = GET_INST(GC, (ffs(inst_mask) - 1));
+		uid = amdgpu_device_get_uid(xcp_mgr->adev->uid_info,
+					    AMDGPU_UID_TYPE_XCD, i);
+		if (uid)
+			xcp->unique_id = uid;
+	}
+}
+
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	struct amdgpu_xcp_ip ip;
+	uint8_t mem_id;
+	int i, j, ret;
+
+	if (!num_xcps || num_xcps > MAX_XCP)
+		return -EINVAL;
+
+	xcp_mgr->mode = mode;
+
+	for (i = 0; i < MAX_XCP; ++i)
+		xcp_mgr->xcp[i].valid = false;
+
+	/* This is needed for figuring out memory id of xcp */
+	xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions;
+
+	for (i = 0; i < num_xcps; ++i) {
+		for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) {
+			ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j,
+							     &ip);
+			if (ret)
+				continue;
+
+			__amdgpu_xcp_add_block(xcp_mgr, i, &ip);
+		}
+
+		xcp_mgr->xcp[i].id = i;
+
+		if (xcp_mgr->funcs->get_xcp_mem_id) {
+			ret = xcp_mgr->funcs->get_xcp_mem_id(
+				xcp_mgr, &xcp_mgr->xcp[i], &mem_id);
+			if (ret)
+				continue;
+			else
+				xcp_mgr->xcp[i].mem_id = mem_id;
+		}
+		__amdgpu_xcp_set_unique_id(xcp_mgr, i);
+	}
+
+	xcp_mgr->num_xcps = num_xcps;
+	amdgpu_xcp_update_partition_sched_list(adev);
+
+	return 0;
+}
+
+static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+					      int mode)
+{
+	int ret, curr_mode, num_xcps = 0;
+
+	if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode)
+		return 0;
+
+	mutex_lock(&xcp_mgr->xcp_lock);
+
+	curr_mode = xcp_mgr->mode;
+	/* State set to transient mode */
+	xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS;
+
+	ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps);
+
+	if (ret) {
+		/* Failed, get whatever mode it's at now */
+		if (xcp_mgr->funcs->query_partition_mode)
+			xcp_mgr->mode = amdgpu_xcp_query_partition_mode(
+				xcp_mgr, AMDGPU_XCP_FL_LOCKED);
+		else
+			xcp_mgr->mode = curr_mode;
+
+		goto out;
+	}
+	amdgpu_xcp_sysfs_entries_update(xcp_mgr);
+out:
+	mutex_unlock(&xcp_mgr->xcp_lock);
+
+	return ret;
+}
+
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+	if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE)
+		return -EINVAL;
+
+	if (xcp_mgr->mode == mode)
+		return 0;
+
+	return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode);
+}
+
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+		return 0;
+
+	return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode);
+}
+
+static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode)
+		return true;
+
+	if (!amdgpu_sriov_vf(xcp_mgr->adev) &&
+	    xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+		return true;
+
+	if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE &&
+	    xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS)
+		return true;
+
+	return false;
+}
+
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+	int mode;
+
+	if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr))
+		return xcp_mgr->mode;
+
+	if (!(flags & AMDGPU_XCP_FL_LOCKED))
+		mutex_lock(&xcp_mgr->xcp_lock);
+	mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr);
+
+	/* First time query for VF, set the mode here */
+	if (amdgpu_sriov_vf(xcp_mgr->adev) &&
+	    xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+		xcp_mgr->mode = mode;
+
+	if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode)
+		dev_WARN(
+			xcp_mgr->adev->dev,
+			"Cached partition mode %d not matching with device mode %d",
+			xcp_mgr->mode, mode);
+
+	if (!(flags & AMDGPU_XCP_FL_LOCKED))
+		mutex_unlock(&xcp_mgr->xcp_lock);
+
+	return mode;
+}
+
+static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
+{
+	struct drm_device *p_ddev;
+	struct drm_device *ddev;
+	int i, ret;
+
+	ddev = adev_to_drm(adev);
+
+	/* xcp #0 shares drm device setting with adev */
+	adev->xcp_mgr->xcp->ddev = ddev;
+
+	for (i = 1; i < MAX_XCP; i++) {
+		ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
+		if (ret == -ENOSPC) {
+			dev_warn(adev->dev,
+			"Skip xcp node #%d when out of drm node resource.", i);
+			ret = 0;
+			goto out;
+		} else if (ret) {
+			goto out;
+		}
+
+		/* Redirect all IOCTLs to the primary device */
+		adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
+		adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev;
+		adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver;
+		adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager;
+		p_ddev->render->dev = ddev;
+		p_ddev->primary->dev = ddev;
+		p_ddev->vma_offset_manager = ddev->vma_offset_manager;
+		p_ddev->driver = &amdgpu_partition_driver;
+		adev->xcp_mgr->xcp[i].ddev = p_ddev;
+
+		dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]);
+	}
+	ret = 0;
+out:
+	amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr);
+
+	return ret;
+}
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+			int init_num_xcps,
+			struct amdgpu_xcp_mgr_funcs *xcp_funcs)
+{
+	struct amdgpu_xcp_mgr *xcp_mgr;
+	int i;
+
+	if (!xcp_funcs || !xcp_funcs->get_ip_details)
+		return -EINVAL;
+
+	xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL);
+
+	if (!xcp_mgr)
+		return -ENOMEM;
+
+	xcp_mgr->adev = adev;
+	xcp_mgr->funcs = xcp_funcs;
+	xcp_mgr->mode = init_mode;
+	mutex_init(&xcp_mgr->xcp_lock);
+
+	if (init_mode != AMDGPU_XCP_MODE_NONE)
+		amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
+
+	adev->xcp_mgr = xcp_mgr;
+	for (i = 0; i < MAX_XCP; ++i)
+		xcp_mgr->xcp[i].xcp_mgr = xcp_mgr;
+
+	return amdgpu_xcp_dev_alloc(adev);
+}
+
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+			     enum AMDGPU_XCP_IP_BLOCK ip, int instance)
+{
+	struct amdgpu_xcp *xcp;
+	int i, id_mask = 0;
+
+	if (ip >= AMDGPU_XCP_MAX_BLOCKS)
+		return -EINVAL;
+
+	for (i = 0; i < xcp_mgr->num_xcps; ++i) {
+		xcp = &xcp_mgr->xcp[i];
+		if ((xcp->valid) && (xcp->ip[ip].valid) &&
+		    (xcp->ip[ip].inst_mask & BIT(instance)))
+			id_mask |= BIT(i);
+	}
+
+	if (!id_mask)
+		id_mask = -ENXIO;
+
+	return id_mask;
+}
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+				enum AMDGPU_XCP_IP_BLOCK ip,
+				uint32_t *inst_mask)
+{
+	if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid))
+		return -EINVAL;
+
+	*inst_mask = xcp->ip[ip].inst_mask;
+
+	return 0;
+}
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+			const struct pci_device_id *ent)
+{
+	int i, ret;
+
+	if (!adev->xcp_mgr)
+		return 0;
+
+	for (i = 1; i < MAX_XCP; i++) {
+		if (!adev->xcp_mgr->xcp[i].ddev)
+			break;
+
+		ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
+{
+	struct drm_device *p_ddev;
+	int i;
+
+	if (!adev->xcp_mgr)
+		return;
+
+	for (i = 1; i < MAX_XCP; i++) {
+		if (!adev->xcp_mgr->xcp[i].ddev)
+			break;
+
+		p_ddev = adev->xcp_mgr->xcp[i].ddev;
+		drm_dev_unplug(p_ddev);
+		p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
+		p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev;
+		p_ddev->driver =  adev->xcp_mgr->xcp[i].driver;
+		p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager;
+		amdgpu_xcp_drm_dev_free(p_ddev);
+	}
+}
+
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+			   struct amdgpu_fpriv *fpriv,
+			   struct drm_file *file_priv)
+{
+	int i;
+
+	if (!adev->xcp_mgr)
+		return 0;
+
+	fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
+	for (i = 0; i < MAX_XCP; ++i) {
+		if (!adev->xcp_mgr->xcp[i].ddev)
+			break;
+
+		if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) {
+			if (adev->xcp_mgr->xcp[i].valid == FALSE) {
+				dev_err(adev->dev, "renderD%d partition %d not valid!",
+						file_priv->minor->index, i);
+				return -ENOENT;
+			}
+			dev_dbg(adev->dev, "renderD%d partition %d opened!",
+					file_priv->minor->index, i);
+			fpriv->xcp_id = i;
+			break;
+		}
+	}
+
+	fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
+				adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
+	return 0;
+}
+
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+				  struct amdgpu_ctx_entity *entity)
+{
+	struct drm_gpu_scheduler *sched;
+	struct amdgpu_ring *ring;
+
+	if (!adev->xcp_mgr)
+		return;
+
+	sched = entity->entity.rq->sched;
+	if (drm_sched_wqueue_ready(sched)) {
+		ring = to_amdgpu_ring(entity->entity.rq->sched);
+		atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
+	}
+}
+
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+			     u32 hw_ip, u32 hw_prio,
+			     struct amdgpu_fpriv *fpriv,
+			     unsigned int *num_scheds,
+			     struct drm_gpu_scheduler ***scheds)
+{
+	u32 sel_xcp_id;
+	int i;
+	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+
+	if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
+		u32 least_ref_cnt = ~0;
+
+		fpriv->xcp_id = 0;
+		for (i = 0; i < xcp_mgr->num_xcps; i++) {
+			u32 total_ref_cnt;
+
+			total_ref_cnt = atomic_read(&xcp_mgr->xcp[i].ref_cnt);
+			if (total_ref_cnt < least_ref_cnt) {
+				fpriv->xcp_id = i;
+				least_ref_cnt = total_ref_cnt;
+			}
+		}
+	}
+	sel_xcp_id = fpriv->xcp_id;
+
+	if (xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) {
+		*num_scheds =
+			xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds;
+		*scheds =
+			xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched;
+		atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt);
+		dev_dbg(adev->dev, "Selected partition #%d", sel_xcp_id);
+	} else {
+		dev_err(adev->dev, "Failed to schedule partition #%d.", sel_xcp_id);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static void amdgpu_set_xcp_id(struct amdgpu_device *adev,
+			      uint32_t inst_idx,
+			      struct amdgpu_ring *ring)
+{
+	int xcp_id;
+	enum AMDGPU_XCP_IP_BLOCK ip_blk;
+	uint32_t inst_mask;
+
+	ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+		adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id;
+	if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) ||
+	    (ring->funcs->type == AMDGPU_RING_TYPE_CPER))
+		return;
+
+	inst_mask = 1 << inst_idx;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_RING_TYPE_COMPUTE:
+	case AMDGPU_RING_TYPE_KIQ:
+		ip_blk = AMDGPU_XCP_GFX;
+		break;
+	case AMDGPU_RING_TYPE_SDMA:
+		ip_blk = AMDGPU_XCP_SDMA;
+		break;
+	case AMDGPU_RING_TYPE_VCN_ENC:
+	case AMDGPU_RING_TYPE_VCN_JPEG:
+		ip_blk = AMDGPU_XCP_VCN;
+		break;
+	default:
+		dev_err(adev->dev, "Not support ring type %d!", ring->funcs->type);
+		return;
+	}
+
+	for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) {
+		if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) {
+			ring->xcp_id = xcp_id;
+			dev_dbg(adev->dev, "ring:%s xcp_id :%u", ring->name,
+				ring->xcp_id);
+			if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+				adev->gfx.enforce_isolation[xcp_id].xcp_id = xcp_id;
+			break;
+		}
+	}
+}
+
+static void amdgpu_xcp_gpu_sched_update(struct amdgpu_device *adev,
+					struct amdgpu_ring *ring,
+					unsigned int sel_xcp_id)
+{
+	unsigned int *num_gpu_sched;
+
+	num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id]
+			.gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds;
+	adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio]
+			.sched[(*num_gpu_sched)++] = &ring->sched;
+	dev_dbg(adev->dev, "%s :[%d] gpu_sched[%d][%d] = %d",
+		ring->name, sel_xcp_id, ring->funcs->type,
+		ring->hw_prio, *num_gpu_sched);
+}
+
+static int amdgpu_xcp_sched_list_update(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	int i;
+
+	for (i = 0; i < MAX_XCP; i++) {
+		atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0);
+		memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched));
+	}
+
+	if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+		return 0;
+
+	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+		ring = adev->rings[i];
+		if (!ring || !ring->sched.ready || ring->no_scheduler)
+			continue;
+
+		amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
+
+		/* VCN may be shared by two partitions under CPX MODE in certain
+		 * configs.
+		 */
+		if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+		     ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+		    (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst))
+			amdgpu_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
+	}
+
+	return 0;
+}
+
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->num_rings; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE ||
+			ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+			amdgpu_set_xcp_id(adev, ring->xcc_id, ring);
+		else
+			amdgpu_set_xcp_id(adev, ring->me, ring);
+	}
+
+	return amdgpu_xcp_sched_list_update(adev);
+}
+
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+
+	xcp_mgr->supp_xcp_modes = 0;
+
+	switch (NUM_XCC(adev->gfx.xcc_mask)) {
+	case 8:
+		xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+					  BIT(AMDGPU_DPX_PARTITION_MODE) |
+					  BIT(AMDGPU_QPX_PARTITION_MODE) |
+					  BIT(AMDGPU_CPX_PARTITION_MODE);
+		break;
+	case 6:
+		xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+					  BIT(AMDGPU_TPX_PARTITION_MODE) |
+					  BIT(AMDGPU_CPX_PARTITION_MODE);
+		break;
+	case 4:
+		xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+					  BIT(AMDGPU_DPX_PARTITION_MODE) |
+					  BIT(AMDGPU_CPX_PARTITION_MODE);
+		break;
+	case 2:
+		xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+					  BIT(AMDGPU_CPX_PARTITION_MODE);
+		break;
+	case 1:
+		xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+					  BIT(AMDGPU_CPX_PARTITION_MODE);
+		break;
+
+	default:
+		break;
+	}
+}
+
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+	/* TODO:
+	 * Stop user queues and threads, and make sure GPU is empty of work.
+	 */
+
+	if (flags & AMDGPU_XCP_OPS_KFD)
+		amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev);
+
+	return 0;
+}
+
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags)
+{
+	int ret = 0;
+
+	if (flags & AMDGPU_XCP_OPS_KFD) {
+		amdgpu_amdkfd_device_probe(xcp_mgr->adev);
+		amdgpu_amdkfd_device_init(xcp_mgr->adev);
+		/* If KFD init failed, return failure */
+		if (!xcp_mgr->adev->kfd.init_complete)
+			ret = -EIO;
+	}
+
+	return ret;
+}
+
+/*====================== xcp sysfs - configuration ======================*/
+#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name)                         \
+	static ssize_t amdgpu_xcp_res_sysfs_##_name##_show(        \
+		struct amdgpu_xcp_res_details *xcp_res, char *buf) \
+	{                                                          \
+		return sysfs_emit(buf, "%d\n", xcp_res->_name);    \
+	}
+
+struct amdgpu_xcp_res_sysfs_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
+};
+
+#define XCP_CFG_SYSFS_RES_ATTR(_name)                                        \
+	struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
+		.attr = { .name = __stringify(_name), .mode = 0400 },        \
+		.show = amdgpu_xcp_res_sysfs_##_name##_show,                 \
+	}
+
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
+XCP_CFG_SYSFS_RES_ATTR(num_inst);
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
+XCP_CFG_SYSFS_RES_ATTR(num_shared);
+
+#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
+
+static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
+	&XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
+	&XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
+};
+
+static const char *xcp_desc[] = {
+	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
+	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
+	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
+	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
+	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static const char *nps_desc[] = {
+	[UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
+	[AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+	[AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+	[AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+	[AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+	[AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+	[AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
+
+#define to_xcp_attr(x) \
+	container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
+#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
+
+static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
+					   struct attribute *attr, char *buf)
+{
+	struct amdgpu_xcp_res_sysfs_attribute *attribute;
+	struct amdgpu_xcp_res_details *xcp_res;
+
+	attribute = to_xcp_attr(attr);
+	xcp_res = to_xcp_res(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(xcp_res, buf);
+}
+
+static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
+	.show = xcp_cfg_res_sysfs_attr_show,
+};
+
+static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
+	.sysfs_ops = &xcp_cfg_res_sysfs_ops,
+	.default_groups = xcp_cfg_res_sysfs_groups,
+};
+
+const char *xcp_res_names[] = {
+	[AMDGPU_XCP_RES_XCC] = "xcc",
+	[AMDGPU_XCP_RES_DMA] = "dma",
+	[AMDGPU_XCP_RES_DEC] = "dec",
+	[AMDGPU_XCP_RES_JPEG] = "jpeg",
+};
+
+static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+				   int mode,
+				   struct amdgpu_xcp_cfg *xcp_cfg)
+{
+	if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
+		return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
+
+	return -EOPNOTSUPP;
+}
+
+#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
+static ssize_t supported_xcp_configs_show(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+	struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
+	int size = 0, mode;
+	char *sep = "";
+
+	if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
+		return sysfs_emit(buf, "Not supported\n");
+
+	for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+		size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+		sep = ", ";
+	}
+
+	size += sysfs_emit_at(buf, size, "\n");
+
+	return size;
+}
+
+static ssize_t supported_nps_configs_show(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+	int size = 0, mode;
+	char *sep = "";
+
+	if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
+		return sysfs_emit(buf, "Not supported\n");
+
+	for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
+		size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+		sep = ", ";
+	}
+
+	size += sysfs_emit_at(buf, size, "\n");
+
+	return size;
+}
+
+static ssize_t xcp_config_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+	return sysfs_emit(buf, "%s\n",
+			  amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
+}
+
+static ssize_t xcp_config_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t size)
+{
+	struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+	int mode, r;
+
+	if (!strncasecmp("SPX", buf, strlen("SPX")))
+		mode = AMDGPU_SPX_PARTITION_MODE;
+	else if (!strncasecmp("DPX", buf, strlen("DPX")))
+		mode = AMDGPU_DPX_PARTITION_MODE;
+	else if (!strncasecmp("TPX", buf, strlen("TPX")))
+		mode = AMDGPU_TPX_PARTITION_MODE;
+	else if (!strncasecmp("QPX", buf, strlen("QPX")))
+		mode = AMDGPU_QPX_PARTITION_MODE;
+	else if (!strncasecmp("CPX", buf, strlen("CPX")))
+		mode = AMDGPU_CPX_PARTITION_MODE;
+	else
+		return -EINVAL;
+
+	r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+
+	if (r)
+		return r;
+
+	xcp_cfg->mode = mode;
+	return size;
+}
+
+static struct kobj_attribute xcp_cfg_sysfs_mode =
+	__ATTR_RW_MODE(xcp_config, 0644);
+
+static void xcp_cfg_sysfs_release(struct kobject *kobj)
+{
+	struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+	kfree(xcp_cfg);
+}
+
+static const struct kobj_type xcp_cfg_sysfs_ktype = {
+	.release = xcp_cfg_sysfs_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute supp_part_sysfs_mode =
+	__ATTR_RO(supported_xcp_configs);
+
+static struct kobj_attribute supp_nps_sysfs_mode =
+	__ATTR_RO(supported_nps_configs);
+
+static const struct attribute *xcp_attrs[] = {
+	&supp_part_sysfs_mode.attr,
+	&xcp_cfg_sysfs_mode.attr,
+	NULL,
+};
+
+static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_xcp_res_details *xcp_res;
+	struct amdgpu_xcp_cfg *xcp_cfg;
+	int i, r, j, rid, mode;
+
+	if (!adev->xcp_mgr)
+		return;
+
+	xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
+	if (!xcp_cfg)
+		return;
+	xcp_cfg->xcp_mgr = adev->xcp_mgr;
+
+	r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
+				 &adev->dev->kobj, "compute_partition_config");
+	if (r)
+		goto err1;
+
+	r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
+	if (r)
+		goto err1;
+
+	if (adev->gmc.supported_nps_modes != 0) {
+		r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+		if (r) {
+			sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+			goto err1;
+		}
+	}
+
+	mode = (xcp_cfg->xcp_mgr->mode ==
+		AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
+		       AMDGPU_SPX_PARTITION_MODE :
+		       xcp_cfg->xcp_mgr->mode;
+	r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+	if (r) {
+		sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+		sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+		goto err1;
+	}
+
+	xcp_cfg->mode = mode;
+	for (i = 0; i < xcp_cfg->num_res; i++) {
+		xcp_res = &xcp_cfg->xcp_res[i];
+		rid = xcp_res->id;
+		r = kobject_init_and_add(&xcp_res->kobj,
+					 &xcp_cfg_res_sysfs_ktype,
+					 &xcp_cfg->kobj, "%s",
+					 xcp_res_names[rid]);
+		if (r)
+			goto err;
+	}
+
+	adev->xcp_mgr->xcp_cfg = xcp_cfg;
+	return;
+err:
+	for (j = 0; j < i; j++) {
+		xcp_res = &xcp_cfg->xcp_res[i];
+		kobject_put(&xcp_res->kobj);
+	}
+
+	sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+	sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+err1:
+	kobject_put(&xcp_cfg->kobj);
+}
+
+static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_xcp_res_details *xcp_res;
+	struct amdgpu_xcp_cfg *xcp_cfg;
+	int i;
+
+	if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
+		return;
+
+	xcp_cfg = adev->xcp_mgr->xcp_cfg;
+	for (i = 0; i < xcp_cfg->num_res; i++) {
+		xcp_res = &xcp_cfg->xcp_res[i];
+		kobject_put(&xcp_res->kobj);
+	}
+
+	sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+	sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+	kobject_put(&xcp_cfg->kobj);
+}
+
+/*====================== xcp sysfs - data entries ======================*/
+
+#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj)
+
+static ssize_t xcp_metrics_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *buf)
+{
+	struct amdgpu_xcp *xcp = to_xcp(kobj);
+	struct amdgpu_xcp_mgr *xcp_mgr;
+	ssize_t size;
+
+	xcp_mgr = xcp->xcp_mgr;
+	size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL);
+	if (size <= 0)
+		return size;
+
+	if (size > PAGE_SIZE)
+		return -ENOSPC;
+
+	return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf);
+}
+
+static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj,
+					   struct attribute *attr, int n)
+{
+	struct amdgpu_xcp *xcp = to_xcp(kobj);
+
+	if (!xcp || !xcp->valid)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics);
+
+static struct attribute *amdgpu_xcp_attrs[] = {
+	&xcp_sysfs_metrics.attr,
+	NULL,
+};
+
+static const struct attribute_group amdgpu_xcp_attrs_group = {
+	.attrs = amdgpu_xcp_attrs,
+	.is_visible = amdgpu_xcp_attrs_is_visible
+};
+
+static const struct kobj_type xcp_sysfs_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n)
+{
+	struct amdgpu_xcp *xcp;
+
+	for (n--; n >= 0; n--) {
+		xcp = &xcp_mgr->xcp[n];
+		if (!xcp->ddev || !xcp->valid)
+			continue;
+		sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+		kobject_put(&xcp->kobj);
+	}
+}
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	struct amdgpu_xcp *xcp;
+	int i, r;
+
+	for (i = 0; i < MAX_XCP; i++) {
+		/* Redirect all IOCTLs to the primary device */
+		xcp = &xcp_mgr->xcp[i];
+		if (!xcp->ddev)
+			break;
+		r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype,
+					 &xcp->ddev->dev->kobj, "xcp");
+		if (r)
+			goto out;
+
+		r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+		if (r)
+			goto out;
+	}
+
+	return;
+out:
+	kobject_put(&xcp->kobj);
+}
+
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	struct amdgpu_xcp *xcp;
+	int i;
+
+	for (i = 0; i < MAX_XCP; i++) {
+		/* Redirect all IOCTLs to the primary device */
+		xcp = &xcp_mgr->xcp[i];
+		if (!xcp->ddev)
+			continue;
+		sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+	}
+
+	return;
+}
+
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev)
+{
+	if (!adev->xcp_mgr)
+		return;
+
+	amdgpu_xcp_cfg_sysfs_init(adev);
+
+	return;
+}
+
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!adev->xcp_mgr)
+		return;
+	amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP);
+	amdgpu_xcp_cfg_sysfs_fini(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
new file mode 100644
index 000000000000..1928d9e224fc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_XCP_H
+#define AMDGPU_XCP_H
+
+#include <linux/pci.h>
+#include <linux/xarray.h>
+
+#include "amdgpu_ctx.h"
+
+#define MAX_XCP 8
+
+#define AMDGPU_XCP_MODE_NONE -1
+#define AMDGPU_XCP_MODE_TRANS -2
+
+#define AMDGPU_XCP_FL_NONE 0
+#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
+#define AMDGPU_XCP_OPS_KFD	(1 << 0)
+
+struct amdgpu_fpriv;
+
+enum AMDGPU_XCP_IP_BLOCK {
+	AMDGPU_XCP_GFXHUB,
+	AMDGPU_XCP_GFX,
+	AMDGPU_XCP_SDMA,
+	AMDGPU_XCP_VCN,
+	AMDGPU_XCP_MAX_BLOCKS
+};
+
+enum AMDGPU_XCP_STATE {
+	AMDGPU_XCP_PREPARE_SUSPEND,
+	AMDGPU_XCP_SUSPEND,
+	AMDGPU_XCP_PREPARE_RESUME,
+	AMDGPU_XCP_RESUME,
+};
+
+enum amdgpu_xcp_res_id {
+	AMDGPU_XCP_RES_XCC,
+	AMDGPU_XCP_RES_DMA,
+	AMDGPU_XCP_RES_DEC,
+	AMDGPU_XCP_RES_JPEG,
+	AMDGPU_XCP_RES_MAX,
+};
+
+struct amdgpu_xcp_res_details {
+	enum amdgpu_xcp_res_id id;
+	u8 num_inst;
+	u8 num_shared;
+	struct kobject kobj;
+};
+
+struct amdgpu_xcp_cfg {
+	u8 mode;
+	struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX];
+	u8 num_res;
+	struct amdgpu_xcp_mgr *xcp_mgr;
+	struct kobject kobj;
+	u16 compatible_nps_modes;
+};
+
+struct amdgpu_xcp_ip_funcs {
+	int (*prepare_suspend)(void *handle, uint32_t inst_mask);
+	int (*suspend)(void *handle, uint32_t inst_mask);
+	int (*prepare_resume)(void *handle, uint32_t inst_mask);
+	int (*resume)(void *handle, uint32_t inst_mask);
+};
+
+struct amdgpu_xcp_ip {
+	struct amdgpu_xcp_ip_funcs *ip_funcs;
+	uint32_t inst_mask;
+
+	enum AMDGPU_XCP_IP_BLOCK ip_id;
+	bool valid;
+};
+
+struct amdgpu_xcp {
+	struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS];
+
+	uint8_t id;
+	uint8_t mem_id;
+	bool valid;
+	atomic_t	ref_cnt;
+	struct drm_device *ddev;
+	struct drm_device *rdev;
+	struct drm_device *pdev;
+	struct drm_driver *driver;
+	struct drm_vma_offset_manager *vma_offset_manager;
+	struct amdgpu_sched	gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+	struct amdgpu_xcp_mgr *xcp_mgr;
+	struct kobject kobj;
+	uint64_t unique_id;
+};
+
+struct amdgpu_xcp_mgr {
+	struct amdgpu_device *adev;
+	struct mutex xcp_lock;
+	struct amdgpu_xcp_mgr_funcs *funcs;
+
+	struct amdgpu_xcp xcp[MAX_XCP];
+	uint8_t num_xcps;
+	int8_t mode;
+
+	 /* Used to determine KFD memory size limits per XCP */
+	unsigned int num_xcp_per_mem_partition;
+	struct amdgpu_xcp_cfg *xcp_cfg;
+	uint32_t supp_xcp_modes;
+	uint32_t avail_xcp_modes;
+};
+
+struct amdgpu_xcp_mgr_funcs {
+	int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode,
+				     int *num_xcps);
+	int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr);
+	int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+			      enum AMDGPU_XCP_IP_BLOCK ip_id,
+			      struct amdgpu_xcp_ip *ip);
+	int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
+			      struct amdgpu_xcp *xcp, uint8_t *mem_id);
+	int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr,
+				int mode,
+				struct amdgpu_xcp_cfg *xcp_cfg);
+	int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+	int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+	int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+	int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+};
+
+int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
+
+int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
+			int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs);
+int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode);
+int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode);
+int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr,
+			     enum AMDGPU_XCP_IP_BLOCK ip, int instance);
+
+int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp,
+				enum AMDGPU_XCP_IP_BLOCK ip,
+				uint32_t *inst_mask);
+
+int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
+				const struct pci_device_id *ent);
+void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev);
+int amdgpu_xcp_open_device(struct amdgpu_device *adev,
+			   struct amdgpu_fpriv *fpriv,
+			   struct drm_file *file_priv);
+void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
+			      struct amdgpu_ctx_entity *entity);
+int amdgpu_xcp_select_scheds(struct amdgpu_device *adev,
+			     u32 hw_ip, u32 hw_prio,
+			     struct amdgpu_fpriv *fpriv,
+			     unsigned int *num_scheds,
+			     struct drm_gpu_scheduler ***scheds);
+void amdgpu_xcp_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr);
+int amdgpu_xcp_update_partition_sched_list(struct amdgpu_device *adev);
+int amdgpu_xcp_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+int amdgpu_xcp_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags);
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev);
+
+static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	if (!xcp_mgr)
+		return 1;
+	else
+		return xcp_mgr->num_xcps;
+}
+
+static inline struct amdgpu_xcp *
+amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
+{
+	if (!xcp_mgr)
+		return NULL;
+
+	while (*from < MAX_XCP) {
+		if (xcp_mgr->xcp[*from].valid)
+			return &xcp_mgr->xcp[*from];
+		++(*from);
+	}
+
+	return NULL;
+}
+
+#define for_each_xcp(xcp_mgr, xcp, i)                            \
+	for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
+	     ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
new file mode 100644
index 000000000000..aad530c46a9f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -0,0 +1,1778 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
+#include "soc15.h"
+#include "df/df_3_6_offset.h"
+#include "xgmi/xgmi_4_0_0_smn.h"
+#include "xgmi/xgmi_4_0_0_sh_mask.h"
+#include "xgmi/xgmi_6_1_0_sh_mask.h"
+#include "wafl/wafl2_4_0_0_smn.h"
+#include "wafl/wafl2_4_0_0_sh_mask.h"
+
+#include "amdgpu_reset.h"
+
+#define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c
+#define smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK   0x11a00218
+#define smnPCS_GOPX1_PCS_ERROR_STATUS    0x12200210
+#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK      0x12200218
+
+#define XGMI_STATE_DISABLE                      0xD1
+#define XGMI_STATE_LS0                          0x81
+#define XGMI_LINK_ACTIVE			1
+#define XGMI_LINK_INACTIVE			0
+
+static DEFINE_MUTEX(xgmi_mutex);
+
+#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE		4
+
+static LIST_HEAD(xgmi_hive_list);
+
+static const int xgmi_pcs_err_status_reg_vg20[] = {
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int wafl_pcs_err_status_reg_vg20[] = {
+	smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS,
+	smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int xgmi_pcs_err_status_reg_arct[] = {
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x100000,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x500000,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x600000,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x700000,
+	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS + 0x800000,
+};
+
+/* same as vg20*/
+static const int wafl_pcs_err_status_reg_arct[] = {
+	smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS,
+	smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000,
+};
+
+static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x200000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x300000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x400000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x500000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x600000,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x700000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x200000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x300000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x400000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x500000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x600000,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x700000
+};
+
+static const int walf_pcs_err_status_reg_aldebaran[] = {
+	smnPCS_GOPX1_PCS_ERROR_STATUS,
+	smnPCS_GOPX1_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
+	smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK,
+	smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_status_reg_v6_4[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS,
+	smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
+};
+
+static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = {
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK,
+	smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
+};
+
+static const u64 xgmi_v6_4_0_mca_base_array[] = {
+	0x11a09200,
+	0x11b09200,
+};
+
+static const char *xgmi_v6_4_0_ras_error_code_ext[32] = {
+	[0x00] = "XGMI PCS DataLossErr",
+	[0x01] = "XGMI PCS TrainingErr",
+	[0x02] = "XGMI PCS FlowCtrlAckErr",
+	[0x03] = "XGMI PCS RxFifoUnderflowErr",
+	[0x04] = "XGMI PCS RxFifoOverflowErr",
+	[0x05] = "XGMI PCS CRCErr",
+	[0x06] = "XGMI PCS BERExceededErr",
+	[0x07] = "XGMI PCS TxMetaDataErr",
+	[0x08] = "XGMI PCS ReplayBufParityErr",
+	[0x09] = "XGMI PCS DataParityErr",
+	[0x0a] = "XGMI PCS ReplayFifoOverflowErr",
+	[0x0b] = "XGMI PCS ReplayFifoUnderflowErr",
+	[0x0c] = "XGMI PCS ElasticFifoOverflowErr",
+	[0x0d] = "XGMI PCS DeskewErr",
+	[0x0e] = "XGMI PCS FlowCtrlCRCErr",
+	[0x0f] = "XGMI PCS DataStartupLimitErr",
+	[0x10] = "XGMI PCS FCInitTimeoutErr",
+	[0x11] = "XGMI PCS RecoveryTimeoutErr",
+	[0x12] = "XGMI PCS ReadySerialTimeoutErr",
+	[0x13] = "XGMI PCS ReadySerialAttemptErr",
+	[0x14] = "XGMI PCS RecoveryAttemptErr",
+	[0x15] = "XGMI PCS RecoveryRelockAttemptErr",
+	[0x16] = "XGMI PCS ReplayAttemptErr",
+	[0x17] = "XGMI PCS SyncHdrErr",
+	[0x18] = "XGMI PCS TxReplayTimeoutErr",
+	[0x19] = "XGMI PCS RxReplayTimeoutErr",
+	[0x1a] = "XGMI PCS LinkSubTxTimeoutErr",
+	[0x1b] = "XGMI PCS LinkSubRxTimeoutErr",
+	[0x1c] = "XGMI PCS RxCMDPktErr",
+};
+
+static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
+	{"XGMI PCS DataLossErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)},
+	{"XGMI PCS TrainingErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TrainingErr)},
+	{"XGMI PCS CRCErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, CRCErr)},
+	{"XGMI PCS BERExceededErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, BERExceededErr)},
+	{"XGMI PCS TxMetaDataErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TxMetaDataErr)},
+	{"XGMI PCS ReplayBufParityErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+	{"XGMI PCS DataParityErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataParityErr)},
+	{"XGMI PCS ReplayFifoOverflowErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+	{"XGMI PCS ReplayFifoUnderflowErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+	{"XGMI PCS ElasticFifoOverflowErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+	{"XGMI PCS DeskewErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DeskewErr)},
+	{"XGMI PCS DataStartupLimitErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+	{"XGMI PCS FCInitTimeoutErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+	{"XGMI PCS RecoveryTimeoutErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+	{"XGMI PCS ReadySerialTimeoutErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+	{"XGMI PCS ReadySerialAttemptErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+	{"XGMI PCS RecoveryAttemptErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+	{"XGMI PCS RecoveryRelockAttemptErr",
+	 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+};
+
+static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
+	{"WAFL PCS DataLossErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataLossErr)},
+	{"WAFL PCS TrainingErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TrainingErr)},
+	{"WAFL PCS CRCErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, CRCErr)},
+	{"WAFL PCS BERExceededErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, BERExceededErr)},
+	{"WAFL PCS TxMetaDataErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TxMetaDataErr)},
+	{"WAFL PCS ReplayBufParityErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayBufParityErr)},
+	{"WAFL PCS DataParityErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataParityErr)},
+	{"WAFL PCS ReplayFifoOverflowErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+	{"WAFL PCS ReplayFifoUnderflowErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+	{"WAFL PCS ElasticFifoOverflowErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+	{"WAFL PCS DeskewErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DeskewErr)},
+	{"WAFL PCS DataStartupLimitErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataStartupLimitErr)},
+	{"WAFL PCS FCInitTimeoutErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+	{"WAFL PCS RecoveryTimeoutErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+	{"WAFL PCS ReadySerialTimeoutErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+	{"WAFL PCS ReadySerialAttemptErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+	{"WAFL PCS RecoveryAttemptErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+	{"WAFL PCS RecoveryRelockAttemptErr",
+	 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+};
+
+static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
+	{"XGMI3X16 PCS DataLossErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)},
+	{"XGMI3X16 PCS TrainingErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)},
+	{"XGMI3X16 PCS FlowCtrlAckErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)},
+	{"XGMI3X16 PCS RxFifoUnderflowErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)},
+	{"XGMI3X16 PCS RxFifoOverflowErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)},
+	{"XGMI3X16 PCS CRCErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)},
+	{"XGMI3X16 PCS BERExceededErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)},
+	{"XGMI3X16 PCS TxVcidDataErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)},
+	{"XGMI3X16 PCS ReplayBufParityErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+	{"XGMI3X16 PCS DataParityErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)},
+	{"XGMI3X16 PCS ReplayFifoOverflowErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+	{"XGMI3X16 PCS ReplayFifoUnderflowErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+	{"XGMI3X16 PCS ElasticFifoOverflowErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+	{"XGMI3X16 PCS DeskewErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)},
+	{"XGMI3X16 PCS FlowCtrlCRCErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)},
+	{"XGMI3X16 PCS DataStartupLimitErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+	{"XGMI3X16 PCS FCInitTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+	{"XGMI3X16 PCS RecoveryTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+	{"XGMI3X16 PCS ReadySerialTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+	{"XGMI3X16 PCS ReadySerialAttemptErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+	{"XGMI3X16 PCS RecoveryAttemptErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+	{"XGMI3X16 PCS RecoveryRelockAttemptErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+	{"XGMI3X16 PCS ReplayAttemptErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)},
+	{"XGMI3X16 PCS SyncHdrErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)},
+	{"XGMI3X16 PCS TxReplayTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)},
+	{"XGMI3X16 PCS RxReplayTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)},
+	{"XGMI3X16 PCS LinkSubTxTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)},
+	{"XGMI3X16 PCS LinkSubRxTimeoutErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)},
+	{"XGMI3X16 PCS RxCMDPktErr",
+	 SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
+};
+
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
+{
+	int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+
+	if (adev->gmc.xgmi.num_physical_nodes <= 1)
+		return -EINVAL;
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		if (link_num < ARRAY_SIZE(link_map_6_4_x))
+			return link_map_6_4_x[link_num];
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+	const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+	const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+							0x11b00070 };
+	u32 i, n;
+	u64 addr;
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+		n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+		addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+		break;
+	case IP_VERSION(6, 4, 1):
+		n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+		addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+		break;
+	default:
+		return U32_MAX;
+	}
+
+	i = global_link_num / n;
+
+	if (!(adev->aid_mask & BIT(i)))
+		return U32_MAX;
+
+	addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+
+	return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+	u32 xgmi_state_reg_val;
+
+	if (adev->gmc.xgmi.num_physical_nodes <= 1)
+		return -EINVAL;
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+		return -ENOLINK;
+
+	if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+		return XGMI_LINK_ACTIVE;
+
+	return XGMI_LINK_INACTIVE;
+}
+
+/**
+ * DOC: AMDGPU XGMI Support
+ *
+ * XGMI is a high speed interconnect that joins multiple GPU cards
+ * into a homogeneous memory space that is organized by a collective
+ * hive ID and individual node IDs, both of which are 64-bit numbers.
+ *
+ * The file xgmi_device_id contains the unique per GPU device ID and
+ * is stored in the /sys/class/drm/card${cardno}/device/ directory.
+ *
+ * Inside the device directory a sub-directory 'xgmi_hive_info' is
+ * created which contains the hive ID and the list of nodes.
+ *
+ * The hive ID is stored in:
+ *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
+ *
+ * The node information is stored in numbered directories:
+ *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
+ *
+ * Each device has their own xgmi_hive_info direction with a mirror
+ * set of node sub-directories.
+ *
+ * The XGMI memory space is built by contiguously adding the power of
+ * two padded VRAM space from each node to each other.
+ *
+ */
+
+static struct attribute amdgpu_xgmi_hive_id = {
+	.name = "xgmi_hive_id",
+	.mode = S_IRUGO
+};
+
+static struct attribute *amdgpu_xgmi_hive_attrs[] = {
+	&amdgpu_xgmi_hive_id,
+	NULL
+};
+ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
+
+static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct amdgpu_hive_info *hive = container_of(
+		kobj, struct amdgpu_hive_info, kobj);
+
+	if (attr == &amdgpu_xgmi_hive_id)
+		return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id);
+
+	return 0;
+}
+
+static void amdgpu_xgmi_hive_release(struct kobject *kobj)
+{
+	struct amdgpu_hive_info *hive = container_of(
+		kobj, struct amdgpu_hive_info, kobj);
+
+	amdgpu_reset_put_reset_domain(hive->reset_domain);
+	hive->reset_domain = NULL;
+
+	mutex_destroy(&hive->hive_lock);
+	kfree(hive);
+}
+
+static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
+	.show = amdgpu_xgmi_show_attrs,
+};
+
+static const struct kobj_type amdgpu_xgmi_hive_type = {
+	.release = amdgpu_xgmi_hive_release,
+	.sysfs_ops = &amdgpu_xgmi_hive_ops,
+	.default_groups = amdgpu_xgmi_hive_groups,
+};
+
+static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id);
+
+}
+
+static ssize_t amdgpu_xgmi_show_physical_id(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+
+	return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id);
+
+}
+
+static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i;
+
+	for (i = 0; i < top->num_nodes; i++)
+		sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
+
+	return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i;
+
+	for (i = 0; i < top->num_nodes; i++)
+		sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
+
+	return sysfs_emit(buf, "%s\n", buf);
+}
+
+static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i, j, size = 0;
+	int current_node;
+	/*
+	 * get the node id in the sysfs for the current socket and show
+	 * it in the port num info output in the sysfs for easy reading.
+	 * it is NOT the one retrieved from xgmi ta.
+	 */
+	for (i = 0; i < top->num_nodes; i++) {
+		if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+			current_node = i;
+			break;
+		}
+	}
+
+	if (i == top->num_nodes)
+		return -EINVAL;
+
+	for (i = 0; i < top->num_nodes; i++) {
+		for (j = 0; j < top->nodes[i].num_links; j++)
+			/* node id in sysfs starts from 1 rather than 0 so +1 here */
+			size += sysfs_emit_at(buf, size, "%02x:%02x ->  %02x:%02x\n", current_node + 1,
+					      top->nodes[i].port_num[j].src_xgmi_port_num, i + 1,
+					      top->nodes[i].port_num[j].dst_xgmi_port_num);
+	}
+
+	return size;
+}
+
+#define AMDGPU_XGMI_SET_FICAA(o)	((o) | 0x456801)
+static ssize_t amdgpu_xgmi_show_error(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
+	uint64_t fica_out;
+	unsigned int error_count = 0;
+
+	ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200);
+	ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208);
+
+	if ((!adev->df.funcs) ||
+	    (!adev->df.funcs->get_fica) ||
+	    (!adev->df.funcs->set_fica))
+		return -EINVAL;
+
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
+	if (fica_out != 0x1f)
+		pr_err("xGMI error counters not enabled!\n");
+
+	fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
+
+	if ((fica_out & 0xffff) == 2)
+		error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
+
+	adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
+
+	return sysfs_emit(buf, "%u\n", error_count);
+}
+
+
+static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
+static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL);
+static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
+static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
+static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
+static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL);
+
+static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
+					 struct amdgpu_hive_info *hive)
+{
+	int ret = 0;
+	char node[10] = { 0 };
+
+	/* Create xgmi device id file */
+	ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id);
+	if (ret) {
+		dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n");
+		return ret;
+	}
+
+	ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id);
+	if (ret) {
+		dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n");
+		return ret;
+	}
+
+	/* Create xgmi error file */
+	ret = device_create_file(adev->dev, &dev_attr_xgmi_error);
+	if (ret)
+		pr_err("failed to create xgmi_error\n");
+
+	/* Create xgmi num hops file */
+	ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
+	if (ret)
+		pr_err("failed to create xgmi_num_hops\n");
+
+	/* Create xgmi num links file */
+	ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (ret)
+		pr_err("failed to create xgmi_num_links\n");
+
+	/* Create xgmi port num file if supported */
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+		ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num);
+		if (ret)
+			dev_err(adev->dev, "failed to create xgmi_port_num\n");
+	}
+
+	/* Create sysfs link to hive info folder on the first device */
+	if (hive->kobj.parent != (&adev->dev->kobj)) {
+		ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
+					"xgmi_hive_info");
+		if (ret) {
+			dev_err(adev->dev, "XGMI: Failed to create link to hive info");
+			goto remove_file;
+		}
+	}
+
+	sprintf(node, "node%d", atomic_read(&hive->number_devices));
+	/* Create sysfs link form the hive folder to yourself */
+	ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node);
+	if (ret) {
+		dev_err(adev->dev, "XGMI: Failed to create link from hive info");
+		goto remove_link;
+	}
+
+	goto success;
+
+
+remove_link:
+	sysfs_remove_link(&adev->dev->kobj, adev_to_drm(adev)->unique);
+
+remove_file:
+	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+	device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
+	device_remove_file(adev->dev, &dev_attr_xgmi_error);
+	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
+
+success:
+	return ret;
+}
+
+static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
+					  struct amdgpu_hive_info *hive)
+{
+	char node[10];
+	memset(node, 0, sizeof(node));
+
+	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
+	device_remove_file(adev->dev, &dev_attr_xgmi_physical_id);
+	device_remove_file(adev->dev, &dev_attr_xgmi_error);
+	device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
+	device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
+	if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG)
+		device_remove_file(adev->dev, &dev_attr_xgmi_port_num);
+
+	if (hive->kobj.parent != (&adev->dev->kobj))
+		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
+
+	sprintf(node, "node%d", atomic_read(&hive->number_devices));
+	sysfs_remove_link(&hive->kobj, node);
+
+}
+
+
+
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive = NULL;
+	int ret;
+
+	if (!adev->gmc.xgmi.hive_id)
+		return NULL;
+
+	if (adev->hive) {
+		kobject_get(&adev->hive->kobj);
+		return adev->hive;
+	}
+
+	mutex_lock(&xgmi_mutex);
+
+	list_for_each_entry(hive, &xgmi_hive_list, node)  {
+		if (hive->hive_id == adev->gmc.xgmi.hive_id)
+			goto pro_end;
+	}
+
+	hive = kzalloc(sizeof(*hive), GFP_KERNEL);
+	if (!hive) {
+		dev_err(adev->dev, "XGMI: allocation failed\n");
+		ret = -ENOMEM;
+		hive = NULL;
+		goto pro_end;
+	}
+
+	/* initialize new hive if not exist */
+	ret = kobject_init_and_add(&hive->kobj,
+			&amdgpu_xgmi_hive_type,
+			&adev->dev->kobj,
+			"%s", "xgmi_hive_info");
+	if (ret) {
+		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
+		kobject_put(&hive->kobj);
+		hive = NULL;
+		goto pro_end;
+	}
+
+	/**
+	 * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
+	 * Host driver decide how to reset the GPU either through FLR or chain reset.
+	 * Guest side will get individual notifications from the host for the FLR
+	 * if necessary.
+	 */
+	if (!amdgpu_sriov_vf(adev)) {
+	/**
+	 * Avoid recreating reset domain when hive is reconstructed for the case
+	 * of reset the devices in the XGMI hive during probe for passthrough GPU
+	 * See https://www.spinics.net/lists/amd-gfx/msg58836.html
+	 */
+		if (adev->reset_domain->type != XGMI_HIVE) {
+			hive->reset_domain =
+				amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
+			if (!hive->reset_domain) {
+				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
+				ret = -ENOMEM;
+				kobject_put(&hive->kobj);
+				hive = NULL;
+				goto pro_end;
+			}
+		} else {
+			amdgpu_reset_get_reset_domain(adev->reset_domain);
+			hive->reset_domain = adev->reset_domain;
+		}
+	}
+
+	hive->hive_id = adev->gmc.xgmi.hive_id;
+	INIT_LIST_HEAD(&hive->device_list);
+	INIT_LIST_HEAD(&hive->node);
+	mutex_init(&hive->hive_lock);
+	atomic_set(&hive->number_devices, 0);
+	task_barrier_init(&hive->tb);
+	hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
+	hive->hi_req_gpu = NULL;
+	atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+
+	/*
+	 * hive pstate on boot is high in vega20 so we have to go to low
+	 * pstate on after boot.
+	 */
+	hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
+	list_add_tail(&hive->node, &xgmi_hive_list);
+
+pro_end:
+	if (hive)
+		kobject_get(&hive->kobj);
+	mutex_unlock(&xgmi_mutex);
+	return hive;
+}
+
+void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)
+{
+	if (hive)
+		kobject_put(&hive->kobj);
+}
+
+int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
+{
+	int ret = 0;
+	struct amdgpu_hive_info *hive;
+	struct amdgpu_device *request_adev;
+	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
+	bool init_low;
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive)
+		return 0;
+
+	request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev;
+	init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
+	amdgpu_put_xgmi_hive(hive);
+	/* fw bug so temporarily disable pstate switching */
+	return 0;
+
+	if (!hive || adev->asic_type != CHIP_VEGA20)
+		return 0;
+
+	mutex_lock(&hive->hive_lock);
+
+	if (is_hi_req)
+		hive->hi_req_count++;
+	else
+		hive->hi_req_count--;
+
+	/*
+	 * Vega20 only needs single peer to request pstate high for the hive to
+	 * go high but all peers must request pstate low for the hive to go low
+	 */
+	if (hive->pstate == pstate ||
+			(!is_hi_req && hive->hi_req_count && !init_low))
+		goto out;
+
+	dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate);
+
+	ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
+	if (ret) {
+		dev_err(request_adev->dev,
+			"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
+			request_adev->gmc.xgmi.node_id,
+			request_adev->gmc.xgmi.hive_id, ret);
+		goto out;
+	}
+
+	if (init_low)
+		hive->pstate = hive->hi_req_count ?
+					hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
+	else {
+		hive->pstate = pstate;
+		hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
+							adev : NULL;
+	}
+out:
+	mutex_unlock(&hive->hive_lock);
+	return ret;
+}
+
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	/* Each psp need to set the latest topology */
+	ret = psp_xgmi_set_topology_info(&adev->psp,
+					 atomic_read(&hive->number_devices),
+					 &adev->psp.xgmi_context.top_info);
+	if (ret)
+		dev_err(adev->dev,
+			"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
+			adev->gmc.xgmi.node_id,
+			adev->gmc.xgmi.hive_id, ret);
+
+	return ret;
+}
+
+
+/*
+ * NOTE psp_xgmi_node_info.num_hops layout is as follows:
+ * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved)
+ * num_hops[5:3] = reserved
+ * num_hops[2:0] = number of hops
+ */
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+			       struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	uint8_t num_hops_mask = 0x7;
+	int i;
+
+	if (!adev->gmc.xgmi.supported)
+		return 0;
+
+	for (i = 0 ; i < top->num_nodes; ++i)
+		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+			return top->nodes[i].num_hops & num_hops_mask;
+
+	dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n",
+		peer_adev->gmc.xgmi.physical_node_id);
+
+	return 0;
+}
+
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+			      enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+			      uint32_t *min_bw, uint32_t *max_bw)
+{
+	bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
+	int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
+	int num_lanes = adev->gmc.xgmi.max_width;
+	int speed = adev->gmc.xgmi.max_speed;
+	int num_links = !peer_mode ? 1 : -1;
+
+	if (!(min_bw && max_bw))
+		return -EINVAL;
+
+	*min_bw = 0;
+	*max_bw = 0;
+
+	if (!adev->gmc.xgmi.supported)
+		return -ENODATA;
+
+	if (peer_mode && !peer_adev)
+		return -EINVAL;
+
+	if (peer_mode) {
+		struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+		int i;
+
+		for (i = 0 ; i < top->num_nodes; ++i) {
+			if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id)
+				continue;
+
+			num_links =  top->nodes[i].num_links;
+			break;
+		}
+	}
+
+	if (num_links == -1) {
+		dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n",
+			peer_adev->gmc.xgmi.physical_node_id);
+	} else if (num_links) {
+		int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE;
+
+		*min_bw = per_link_bw;
+		*max_bw = num_links * per_link_bw;
+	}
+
+	return 0;
+}
+
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+					struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i;
+
+	/* Sharing should always be enabled for non-SRIOV. */
+	if (!amdgpu_sriov_vf(adev))
+		return true;
+
+	for (i = 0 ; i < top->num_nodes; ++i)
+		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+			return !!top->nodes[i].is_sharing_enabled;
+
+	return false;
+}
+
+/*
+ * Devices that support extended data require the entire hive to initialize with
+ * the shared memory buffer flag set.
+ *
+ * Hive locks and conditions apply - see amdgpu_xgmi_add_device
+ */
+static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive,
+							bool set_extended_data)
+{
+	struct amdgpu_device *tmp_adev;
+	int ret;
+
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false);
+		if (ret) {
+			dev_err(tmp_adev->dev,
+				"XGMI: Failed to initialize xgmi session for data partition %i\n",
+				set_extended_data);
+			return ret;
+		}
+
+	}
+
+	return 0;
+}
+
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
+{
+	struct psp_xgmi_topology_info *top_info;
+	struct amdgpu_hive_info *hive;
+	struct amdgpu_xgmi	*entry;
+	struct amdgpu_device *tmp_adev = NULL;
+
+	int count = 0, ret = 0;
+
+	if (!adev->gmc.xgmi.supported)
+		return 0;
+
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+		ret = psp_xgmi_initialize(&adev->psp, false, true);
+		if (ret) {
+			dev_err(adev->dev,
+				"XGMI: Failed to initialize xgmi session\n");
+			return ret;
+		}
+
+		ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
+		if (ret) {
+			dev_err(adev->dev,
+				"XGMI: Failed to get hive id\n");
+			return ret;
+		}
+
+		ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
+		if (ret) {
+			dev_err(adev->dev,
+				"XGMI: Failed to get node id\n");
+			return ret;
+		}
+	} else {
+		adev->gmc.xgmi.hive_id = 16;
+		adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
+	}
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive) {
+		ret = -EINVAL;
+		dev_err(adev->dev,
+			"XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
+			adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
+		goto exit;
+	}
+	mutex_lock(&hive->hive_lock);
+
+	top_info = &adev->psp.xgmi_context.top_info;
+
+	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
+	list_for_each_entry(entry, &hive->device_list, head)
+		top_info->nodes[count++].node_id = entry->node_id;
+	top_info->num_nodes = count;
+	atomic_set(&hive->number_devices, count);
+
+	task_barrier_add_task(&hive->tb);
+
+	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+			/* update node list for other device in the hive */
+			if (tmp_adev != adev) {
+				top_info = &tmp_adev->psp.xgmi_context.top_info;
+				top_info->nodes[count - 1].node_id =
+					adev->gmc.xgmi.node_id;
+				top_info->num_nodes = count;
+			}
+			ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+			if (ret)
+				goto exit_unlock;
+		}
+
+		if (amdgpu_sriov_vf(adev) &&
+			adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+			/* only get topology for VF being init if it can support full duplex */
+			ret = psp_xgmi_get_topology_info(&adev->psp, count,
+						&adev->psp.xgmi_context.top_info, false);
+			if (ret) {
+				dev_err(adev->dev,
+					"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+					adev->gmc.xgmi.node_id,
+					adev->gmc.xgmi.hive_id, ret);
+				/* To do: continue with some node failed or disable the whole hive*/
+				goto exit_unlock;
+			}
+		} else {
+			/* get latest topology info for each device from psp */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+					&tmp_adev->psp.xgmi_context.top_info, false);
+				if (ret) {
+					dev_err(tmp_adev->dev,
+						"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+						tmp_adev->gmc.xgmi.node_id,
+						tmp_adev->gmc.xgmi.hive_id, ret);
+					/* To do : continue with some node failed or disable the whole hive */
+					goto exit_unlock;
+				}
+			}
+		}
+
+		/* get topology again for hives that support extended data */
+		if (adev->psp.xgmi_context.supports_extended_data) {
+
+			/* initialize the hive to get extended data.  */
+			ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true);
+			if (ret)
+				goto exit_unlock;
+
+			/* get the extended data. */
+			list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+				ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+						&tmp_adev->psp.xgmi_context.top_info, true);
+				if (ret) {
+					dev_err(tmp_adev->dev,
+						"XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d",
+						tmp_adev->gmc.xgmi.node_id,
+						tmp_adev->gmc.xgmi.hive_id, ret);
+					goto exit_unlock;
+				}
+			}
+
+			/* initialize the hive to get non-extended data for the next round. */
+			ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false);
+			if (ret)
+				goto exit_unlock;
+
+		}
+	}
+
+	if (!ret)
+		ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
+
+exit_unlock:
+	mutex_unlock(&hive->hive_lock);
+exit:
+	if (!ret) {
+		adev->hive = hive;
+		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
+			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
+	} else {
+		amdgpu_put_xgmi_hive(hive);
+		dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",
+			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
+			ret);
+	}
+
+	return ret;
+}
+
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive = adev->hive;
+
+	if (!adev->gmc.xgmi.supported)
+		return -EINVAL;
+
+	if (!hive)
+		return -EINVAL;
+
+	mutex_lock(&hive->hive_lock);
+	task_barrier_rem_task(&hive->tb);
+	amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
+	if (hive->hi_req_gpu == adev)
+		hive->hi_req_gpu = NULL;
+	list_del(&adev->gmc.xgmi.head);
+	mutex_unlock(&hive->hive_lock);
+
+	amdgpu_put_xgmi_hive(hive);
+	adev->hive = NULL;
+
+	if (atomic_dec_return(&hive->number_devices) == 0) {
+		/* Remove the hive from global hive list */
+		mutex_lock(&xgmi_mutex);
+		list_del(&hive->node);
+		mutex_unlock(&xgmi_mutex);
+
+		amdgpu_put_xgmi_hive(hive);
+	}
+
+	return 0;
+}
+
+static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+				       enum aca_smu_type type, void *data)
+{
+	struct amdgpu_device *adev = handle->adev;
+	struct aca_bank_info info;
+	const char *error_str;
+	u64 status, count;
+	int ret, ext_error_code;
+
+	ret = aca_bank_info_decode(bank, &info);
+	if (ret)
+		return ret;
+
+	status = bank->regs[ACA_REG_IDX_STATUS];
+	ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+	error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+		xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+	if (error_str)
+		dev_info(adev->dev, "%s detected\n", error_str);
+
+	count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
+
+	switch (type) {
+	case ACA_SMU_TYPE_UE:
+		if (ext_error_code != 0 && ext_error_code != 9)
+			count = 0ULL;
+
+		bank->aca_err_type = ACA_ERROR_TYPE_UE;
+		ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
+		break;
+	case ACA_SMU_TYPE_CE:
+		count = ext_error_code == 6 ? count : 0ULL;
+		bank->aca_err_type = ACA_ERROR_TYPE_CE;
+		ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
+	.aca_bank_parser = xgmi_v6_4_0_aca_bank_parser,
+};
+
+static const struct aca_info xgmi_v6_4_0_aca_info = {
+	.hwip = ACA_HWIP_TYPE_PCS_XGMI,
+	.mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+	.bank_ops = &xgmi_v6_4_0_aca_bank_ops,
+};
+
+static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	int r;
+
+	if (!adev->gmc.xgmi.supported ||
+	    adev->gmc.xgmi.num_physical_nodes == 0)
+		return 0;
+
+	amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
+
+	r = amdgpu_ras_block_late_init(adev, ras_block);
+	if (r)
+		return r;
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
+					&xgmi_v6_4_0_aca_info, NULL);
+		if (r)
+			goto late_fini;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+
+late_fini:
+	amdgpu_ras_block_late_fini(adev, ras_block);
+
+	return r;
+}
+
+uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
+					   uint64_t addr)
+{
+	struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi;
+	return (addr + xgmi->physical_node_id * xgmi->node_segment_size);
+}
+
+static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg)
+{
+	WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF);
+	WREG32_PCIE(pcs_status_reg, 0);
+}
+
+static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	uint32_t i;
+
+	switch (adev->asic_type) {
+	case CHIP_ARCTURUS:
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++)
+			pcs_clear_status(adev,
+					 xgmi_pcs_err_status_reg_arct[i]);
+		break;
+	case CHIP_VEGA20:
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++)
+			pcs_clear_status(adev,
+					 xgmi_pcs_err_status_reg_vg20[i]);
+		break;
+	case CHIP_ALDEBARAN:
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++)
+			pcs_clear_status(adev,
+					 xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+		for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++)
+			pcs_clear_status(adev,
+					 walf_pcs_err_status_reg_aldebaran[i]);
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
+			pcs_clear_status(adev,
+					xgmi3x16_pcs_err_status_reg_v6_4[i]);
+		break;
+	default:
+		break;
+	}
+}
+
+static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base)
+{
+	WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+		__xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]);
+}
+
+static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	int i;
+
+	for_each_inst(i, adev->aid_mask)
+		xgmi_v6_4_0_reset_error_count(adev, i);
+}
+
+static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		xgmi_v6_4_0_reset_ras_error_count(adev);
+		break;
+	default:
+		amdgpu_xgmi_legacy_reset_ras_error_count(adev);
+		break;
+	}
+}
+
+static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
+					      uint32_t value,
+						  uint32_t mask_value,
+					      uint32_t *ue_count,
+					      uint32_t *ce_count,
+					      bool is_xgmi_pcs,
+						  bool check_mask)
+{
+	int i;
+	int ue_cnt = 0;
+	const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL;
+	uint32_t field_array_size = 0;
+
+	if (is_xgmi_pcs) {
+		if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+		    IP_VERSION(6, 1, 0) ||
+		    amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+		    IP_VERSION(6, 4, 0) ||
+		    amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+		    IP_VERSION(6, 4, 1)) {
+			pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
+			field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
+		} else {
+			pcs_ras_fields = &xgmi_pcs_ras_fields[0];
+			field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
+		}
+	} else {
+		pcs_ras_fields = &wafl_pcs_ras_fields[0];
+		field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields);
+	}
+
+	if (check_mask)
+		value = value & ~mask_value;
+
+	/* query xgmi/walf pcs error status,
+	 * only ue is supported */
+	for (i = 0; value && i < field_array_size; i++) {
+		ue_cnt = (value &
+				pcs_ras_fields[i].pcs_err_mask) >>
+				pcs_ras_fields[i].pcs_err_shift;
+		if (ue_cnt) {
+			dev_info(adev->dev, "%s detected\n",
+				 pcs_ras_fields[i].err_name);
+			*ue_count += ue_cnt;
+		}
+
+		/* reset bit value if the bit is checked */
+		value &= ~(pcs_ras_fields[i].pcs_err_mask);
+	}
+
+	return 0;
+}
+
+static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
+						     void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	int i, supported = 1;
+	uint32_t data, mask_data = 0;
+	uint32_t ue_cnt = 0, ce_cnt = 0;
+
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
+		return ;
+
+	err_data->ue_count = 0;
+	err_data->ce_count = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_ARCTURUS:
+		/* check xgmi pcs error */
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) {
+			data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, true, false);
+		}
+		/* check wafl pcs error */
+		for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct); i++) {
+			data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, false, false);
+		}
+		break;
+	case CHIP_VEGA20:
+		/* check xgmi pcs error */
+		for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) {
+			data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, true, false);
+		}
+		/* check wafl pcs error */
+		for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20); i++) {
+			data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, false, false);
+		}
+		break;
+	case CHIP_ALDEBARAN:
+		/* check xgmi3x16 pcs error */
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) {
+			data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]);
+			mask_data =
+				RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, true, true);
+		}
+		/* check wafl pcs error */
+		for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran); i++) {
+			data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i]);
+			mask_data =
+				RREG32_PCIE(walf_pcs_err_noncorrectable_mask_reg_aldebaran[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, false, true);
+		}
+		break;
+	default:
+		supported = 0;
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		/* check xgmi3x16 pcs error */
+		for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
+			data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
+			mask_data =
+				RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]);
+			if (data)
+				amdgpu_xgmi_query_pcs_error_status(adev, data,
+						mask_data, &ue_cnt, &ce_cnt, true, true);
+		}
+		break;
+	default:
+		if (!supported)
+			dev_warn(adev->dev, "XGMI RAS error query not supported");
+		break;
+	}
+
+	amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL);
+
+	err_data->ue_count += ue_cnt;
+	err_data->ce_count += ce_cnt;
+}
+
+static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status)
+{
+	const char *error_str;
+	int ext_error_code;
+
+	ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+
+	error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ?
+		xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL;
+	if (error_str)
+		dev_info(adev->dev, "%s detected\n", error_str);
+
+	switch (ext_error_code) {
+	case 0:
+		return ACA_ERROR_TYPE_UE;
+	case 6:
+		return ACA_ERROR_TYPE_CE;
+	default:
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
+static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info,
+					    u64 mca_base, struct ras_err_data *err_data)
+{
+	int xgmi_inst = mcm_info->die_id;
+	u64 status = 0;
+
+	status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS);
+	if (!ACA_REG__STATUS__VAL(status))
+		return;
+
+	switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) {
+	case ACA_ERROR_TYPE_UE:
+		amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL);
+		break;
+	case ACA_ERROR_TYPE_CE:
+		amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL);
+		break;
+	default:
+		break;
+	}
+
+	WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL);
+}
+
+static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data)
+{
+	struct amdgpu_smuio_mcm_config_info mcm_info = {
+		.socket_id = adev->smuio.funcs->get_socket_id(adev),
+		.die_id = xgmi_inst,
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++)
+		__xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data);
+}
+
+static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+	int i;
+
+	for_each_inst(i, adev->aid_mask)
+		xgmi_v6_4_0_query_error_count(adev, i, err_data);
+}
+
+static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
+					      void *ras_error_status)
+{
+	switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+	case IP_VERSION(6, 4, 0):
+	case IP_VERSION(6, 4, 1):
+		xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
+		break;
+	default:
+		amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status);
+		break;
+	}
+}
+
+/* Trigger XGMI/WAFL error */
+static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
+			void *inject_if, uint32_t instance_mask)
+{
+	int ret1, ret2;
+	struct ta_ras_trigger_error_input *block_info =
+				(struct ta_ras_trigger_error_input *)inject_if;
+
+	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
+		dev_warn(adev->dev, "Failed to disallow df cstate");
+
+	ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW);
+	if (ret1 && ret1 != -EOPNOTSUPP)
+		dev_warn(adev->dev, "Failed to disallow XGMI power down");
+
+	ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask);
+
+	if (amdgpu_ras_intr_triggered())
+		return ret2;
+
+	ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT);
+	if (ret1 && ret1 != -EOPNOTSUPP)
+		dev_warn(adev->dev, "Failed to allow XGMI power down");
+
+	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
+		dev_warn(adev->dev, "Failed to allow df cstate");
+
+	return ret2;
+}
+
+struct amdgpu_ras_block_hw_ops  xgmi_ras_hw_ops = {
+	.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
+	.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
+	.ras_error_inject = amdgpu_ras_error_inject_xgmi,
+};
+
+struct amdgpu_xgmi_ras xgmi_ras = {
+	.ras_block = {
+		.hw_ops = &xgmi_ras_hw_ops,
+		.ras_late_init = amdgpu_xgmi_ras_late_init,
+	},
+};
+
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_xgmi_ras *ras;
+
+	if (!adev->gmc.xgmi.ras)
+		return 0;
+
+	ras = adev->gmc.xgmi.ras;
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work)
+{
+	struct amdgpu_hive_info *hive =
+		container_of(work, struct amdgpu_hive_info, reset_on_init_work);
+	struct amdgpu_reset_context reset_context;
+	struct amdgpu_device *tmp_adev;
+	struct list_head device_list;
+	int r;
+
+	mutex_lock(&hive->hive_lock);
+
+	INIT_LIST_HEAD(&device_list);
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+		list_add_tail(&tmp_adev->reset_list, &device_list);
+
+	tmp_adev = list_first_entry(&device_list, struct amdgpu_device,
+				    reset_list);
+	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+	reset_context.method = AMD_RESET_METHOD_ON_INIT;
+	reset_context.reset_req_dev = tmp_adev;
+	reset_context.hive = hive;
+	reset_context.reset_device_list = &device_list;
+	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+	set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+	amdgpu_reset_do_xgmi_reset_on_init(&reset_context);
+	mutex_unlock(&hive->hive_lock);
+	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		r = amdgpu_ras_init_badpage_info(tmp_adev);
+		if (r && r != -EHWPOISON)
+			dev_err(tmp_adev->dev,
+				"error during bad page data initialization");
+	}
+}
+
+static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive)
+{
+	INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work);
+	amdgpu_reset_domain_schedule(hive->reset_domain,
+				     &hive->reset_on_init_work);
+}
+
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive;
+	bool reset_scheduled;
+	int num_devs;
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive)
+		return -EINVAL;
+
+	mutex_lock(&hive->hive_lock);
+	num_devs = atomic_read(&hive->number_devices);
+	reset_scheduled = false;
+	if (num_devs == adev->gmc.xgmi.num_physical_nodes) {
+		amdgpu_xgmi_schedule_reset_on_init(hive);
+		reset_scheduled = true;
+	}
+
+	mutex_unlock(&hive->hive_lock);
+	amdgpu_put_xgmi_hive(hive);
+
+	if (reset_scheduled)
+		flush_work(&hive->reset_on_init_work);
+
+	return 0;
+}
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+				   struct amdgpu_hive_info *hive,
+				   int req_nps_mode)
+{
+	struct amdgpu_device *tmp_adev;
+	int cur_nps_mode, r;
+
+	/* This is expected to be called only during unload of driver. The
+	 * request needs to be placed only once for all devices in the hive. If
+	 * one of them fail, revert the request for previous successful devices.
+	 * After placing the request, make hive mode as UNKNOWN so that other
+	 * devices don't request anymore.
+	 */
+	mutex_lock(&hive->hive_lock);
+	if (atomic_read(&hive->requested_nps_mode) ==
+	    UNKNOWN_MEMORY_PARTITION_MODE) {
+		dev_dbg(adev->dev, "Unexpected entry for hive NPS change");
+		mutex_unlock(&hive->hive_lock);
+		return 0;
+	}
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+			tmp_adev, req_nps_mode);
+		if (r)
+			break;
+	}
+	if (r) {
+		/* Request back current mode if one of the requests failed */
+		cur_nps_mode =
+			adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+		list_for_each_entry_continue_reverse(
+			tmp_adev, &hive->device_list, gmc.xgmi.head)
+			adev->gmc.gmc_funcs->request_mem_partition_mode(
+				tmp_adev, cur_nps_mode);
+	}
+	/* Set to UNKNOWN so that other devices don't request anymore */
+	atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+	mutex_unlock(&hive->hive_lock);
+
+	return r;
+}
+
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+			   struct amdgpu_device *bo_adev)
+{
+	return (amdgpu_use_xgmi_p2p && adev != bo_adev &&
+		adev->gmc.xgmi.hive_id &&
+		adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev)
+{
+	if (!adev->gmc.xgmi.supported)
+		return;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+		/* 25 GT/s */
+		adev->gmc.xgmi.max_speed = 25;
+		adev->gmc.xgmi.max_width = 16;
+		break;
+	case IP_VERSION(9, 4, 3):
+	case IP_VERSION(9, 4, 4):
+	case IP_VERSION(9, 5, 0):
+		/* 32 GT/s */
+		adev->gmc.xgmi.max_speed = 32;
+		adev->gmc.xgmi.max_width = 16;
+		break;
+	default:
+		break;
+	}
+}
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+				    uint16_t max_speed, uint8_t max_width)
+{
+	adev->gmc.xgmi.max_speed = max_speed;
+	adev->gmc.xgmi.max_width = max_width;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
new file mode 100644
index 000000000000..5f36aff17e79
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_XGMI_H__
+#define __AMDGPU_XGMI_H__
+
+#include <drm/task_barrier.h>
+#include "amdgpu_ras.h"
+
+struct amdgpu_hive_info {
+	struct kobject kobj;
+	uint64_t hive_id;
+	struct list_head device_list;
+	struct list_head node;
+	atomic_t number_devices;
+	struct mutex hive_lock;
+	int hi_req_count;
+	struct amdgpu_device *hi_req_gpu;
+	struct task_barrier tb;
+	enum {
+		AMDGPU_XGMI_PSTATE_MIN,
+		AMDGPU_XGMI_PSTATE_MAX_VEGA20,
+		AMDGPU_XGMI_PSTATE_UNKNOWN
+	} pstate;
+
+	struct amdgpu_reset_domain *reset_domain;
+	atomic_t ras_recovery;
+	struct ras_event_manager event_mgr;
+	struct work_struct reset_on_init_work;
+	atomic_t requested_nps_mode;
+};
+
+struct amdgpu_pcs_ras_field {
+	const char *err_name;
+	uint32_t pcs_err_mask;
+	uint32_t pcs_err_shift;
+};
+
+/**
+ * Bandwidth range reporting comes in two modes.
+ *
+ * PER_LINK - range for any xgmi link
+ * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
+ */
+enum amdgpu_xgmi_bw_mode {
+	AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
+	AMDGPU_XGMI_BW_MODE_PER_PEER
+};
+
+enum amdgpu_xgmi_bw_unit {
+	AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
+	AMDGPU_XGMI_BW_UNIT_MBYTES
+};
+
+struct amdgpu_xgmi_ras {
+	struct amdgpu_ras_block_object ras_block;
+};
+extern struct amdgpu_xgmi_ras xgmi_ras;
+
+struct amdgpu_xgmi {
+	/* from psp */
+	u64 node_id;
+	u64 hive_id;
+	/* fixed per family */
+	u64 node_segment_size;
+	/* physical node (0-3) */
+	unsigned physical_node_id;
+	/* number of nodes (0-4) */
+	unsigned num_physical_nodes;
+	/* gpu list in the same hive */
+	struct list_head head;
+	bool supported;
+	struct ras_common_if *ras_if;
+	bool connected_to_cpu;
+	struct amdgpu_xgmi_ras *ras;
+	uint16_t max_speed;
+	uint8_t max_width;
+};
+
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
+void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
+int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+			      enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+			      uint32_t *min_bw, uint32_t *max_bw);
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+					struct amdgpu_device *peer_adev);
+uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
+					   uint64_t addr);
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+			   struct amdgpu_device *bo_adev);
+int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+				   struct amdgpu_hive_info *hive,
+				   int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+				int global_link_num);
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
+uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
+
+void amgpu_xgmi_set_max_speed_width(struct amdgpu_device *adev,
+				    uint16_t max_speed, uint8_t max_width);
+
+/* Cleanup macro for use with __free(xgmi_put_hive) */
+DEFINE_FREE(xgmi_put_hive, struct amdgpu_hive_info *, if (_T) amdgpu_put_xgmi_hive(_T))
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
new file mode 100644
index 000000000000..3cdb1e0eca37
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -0,0 +1,516 @@
+/*
+ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef AMDGV_SRIOV_MSG__H_
+#define AMDGV_SRIOV_MSG__H_
+
+#define AMD_SRIOV_MSG_SIZE_KB                           1
+
+/*
+ * layout v1
+ * 0           64KB        65KB        66KB           68KB                   132KB
+ * |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
+ * |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
+ */
+
+/*
+ * layout v2 (offsets are dynamically allocated and the offsets below are examples)
+ * 0           1KB         64KB        65KB        66KB           68KB                   132KB
+ * |  INITD_H  |   VBIOS   |   PF2VF   |   VF2PF   |   Bad Page   | RAS Telemetry Region | ...
+ * |   1KB     |   64KB    |   1KB     |   1KB     |   2KB        | 64KB                 | ...
+ *
+ * Note: PF2VF + VF2PF + Bad Page = DataExchange region (allocated contiguously)
+ */
+
+/* v1 layout sizes */
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1			64
+#define AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1			1
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1		2
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1		64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_SIZE_KB_V1 + AMD_SRIOV_MSG_VF2PF_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+
+/* v1 offsets */
+#define AMD_SRIOV_MSG_VBIOS_OFFSET_V1			0
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1		AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB			2048
+#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1		AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB_V1
+#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1		\
+	(AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1	\
+	(AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB_V1 + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB_V1)
+#define AMD_SRIOV_MSG_INIT_DATA_TOT_SIZE_KB_V1		\
+	(AMD_SRIOV_MSG_VBIOS_SIZE_KB_V1 + AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB_V1 + \
+	 AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1)
+
+enum amd_sriov_crit_region_version {
+	GPU_CRIT_REGION_V1 = 1,
+	GPU_CRIT_REGION_V2 = 2,
+};
+
+/* v2 layout offset enum (in order of allocation) */
+enum amd_sriov_msg_table_id_enum {
+	AMD_SRIOV_MSG_IPD_TABLE_ID = 0,
+	AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID,
+	AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID,
+	AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID,
+	AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID,
+	AMD_SRIOV_MSG_INITD_H_TABLE_ID,
+	AMD_SRIOV_MSG_MAX_TABLE_ID,
+};
+
+struct amd_sriov_msg_init_data_header {
+	char     signature[4];  /* "INDA"  */
+	uint32_t version;
+	uint32_t checksum;
+	uint32_t initdata_offset; /* 0 */
+	uint32_t initdata_size_in_kb; /* 5MB */
+	uint32_t valid_tables;
+	uint32_t vbios_img_offset;
+	uint32_t vbios_img_size_in_kb;
+	uint32_t dataexchange_offset;
+	uint32_t dataexchange_size_in_kb;
+	uint32_t ras_tele_info_offset;
+	uint32_t ras_tele_info_size_in_kb;
+	uint32_t ip_discovery_offset;
+	uint32_t ip_discovery_size_in_kb;
+	uint32_t bad_page_info_offset;
+	uint32_t bad_page_size_in_kb;
+	uint32_t reserved[8];
+};
+
+/*
+ * PF2VF history log:
+ * v1 defined in amdgim
+ * v2 current
+ *
+ * VF2PF history log:
+ * v1 defined in amdgim
+ * v2 defined in amdgim
+ * v3 current
+ */
+#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
+#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+
+#define AMD_SRIOV_MSG_RESERVE_UCODE 24
+
+#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4
+
+enum amd_sriov_ucode_engine_id {
+	AMD_SRIOV_UCODE_ID_VCE = 0,
+	AMD_SRIOV_UCODE_ID_UVD,
+	AMD_SRIOV_UCODE_ID_MC,
+	AMD_SRIOV_UCODE_ID_ME,
+	AMD_SRIOV_UCODE_ID_PFP,
+	AMD_SRIOV_UCODE_ID_CE,
+	AMD_SRIOV_UCODE_ID_RLC,
+	AMD_SRIOV_UCODE_ID_RLC_SRLC,
+	AMD_SRIOV_UCODE_ID_RLC_SRLG,
+	AMD_SRIOV_UCODE_ID_RLC_SRLS,
+	AMD_SRIOV_UCODE_ID_MEC,
+	AMD_SRIOV_UCODE_ID_MEC2,
+	AMD_SRIOV_UCODE_ID_SOS,
+	AMD_SRIOV_UCODE_ID_ASD,
+	AMD_SRIOV_UCODE_ID_TA_RAS,
+	AMD_SRIOV_UCODE_ID_TA_XGMI,
+	AMD_SRIOV_UCODE_ID_SMC,
+	AMD_SRIOV_UCODE_ID_SDMA,
+	AMD_SRIOV_UCODE_ID_SDMA2,
+	AMD_SRIOV_UCODE_ID_VCN,
+	AMD_SRIOV_UCODE_ID_DMCU,
+	AMD_SRIOV_UCODE_ID__MAX
+};
+
+#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
+
+union amd_sriov_msg_feature_flags {
+	struct {
+		uint32_t error_log_collect	: 1;
+		uint32_t host_load_ucodes	: 1;
+		uint32_t host_flr_vramlost	: 1;
+		uint32_t mm_bw_management	: 1;
+		uint32_t pp_one_vf_mode		: 1;
+		uint32_t reg_indirect_acc	: 1;
+		uint32_t av1_support		: 1;
+		uint32_t vcn_rb_decouple	: 1;
+		uint32_t mes_info_dump_enable	: 1;
+		uint32_t ras_caps		: 1;
+		uint32_t ras_telemetry		: 1;
+		uint32_t ras_cper		: 1;
+		uint32_t xgmi_ta_ext_peer_link	: 1;
+		uint32_t reserved		: 19;
+	} flags;
+	uint32_t all;
+};
+
+union amd_sriov_reg_access_flags {
+	struct {
+		uint32_t vf_reg_access_ih		: 1;
+		uint32_t vf_reg_access_mmhub		: 1;
+		uint32_t vf_reg_access_gc		: 1;
+		uint32_t vf_reg_access_l1_tlb_cntl	: 1;
+		uint32_t vf_reg_access_sq_config	: 1;
+		uint32_t reserved			: 27;
+	} flags;
+	uint32_t all;
+};
+
+union amd_sriov_ras_caps {
+	struct {
+		uint64_t block_umc			: 1;
+		uint64_t block_sdma			: 1;
+		uint64_t block_gfx			: 1;
+		uint64_t block_mmhub			: 1;
+		uint64_t block_athub			: 1;
+		uint64_t block_pcie_bif			: 1;
+		uint64_t block_hdp			: 1;
+		uint64_t block_xgmi_wafl		: 1;
+		uint64_t block_df			: 1;
+		uint64_t block_smn			: 1;
+		uint64_t block_sem			: 1;
+		uint64_t block_mp0			: 1;
+		uint64_t block_mp1			: 1;
+		uint64_t block_fuse			: 1;
+		uint64_t block_mca			: 1;
+		uint64_t block_vcn			: 1;
+		uint64_t block_jpeg			: 1;
+		uint64_t block_ih			: 1;
+		uint64_t block_mpio			: 1;
+		uint64_t block_mmsch			: 1;
+		uint64_t poison_propogation_mode	: 1;
+		uint64_t reserved			: 43;
+	} bits;
+	uint64_t all;
+};
+
+union amd_sriov_msg_os_info {
+	struct {
+		uint32_t windows  : 1;
+		uint32_t reserved : 31;
+	} info;
+	uint32_t all;
+};
+
+struct amd_sriov_msg_uuid_info {
+	union {
+		struct {
+			uint32_t did	: 16;
+			uint32_t fcn	: 8;
+			uint32_t asic_7 : 8;
+		};
+		uint32_t time_low;
+	};
+
+	struct {
+		uint32_t time_mid  : 16;
+		uint32_t time_high : 12;
+		uint32_t version   : 4;
+	};
+
+	struct {
+		struct {
+			uint8_t clk_seq_hi : 6;
+			uint8_t variant    : 2;
+		};
+		union {
+			uint8_t clk_seq_low;
+			uint8_t asic_6;
+		};
+		uint16_t asic_4;
+	};
+
+	uint32_t asic_0;
+};
+
+struct amd_sriov_msg_pf2vf_info_header {
+	/* the total structure size in byte */
+	uint32_t size;
+	/* version of this structure, written by the HOST */
+	uint32_t version;
+	/* reserved */
+	uint32_t reserved[2];
+};
+
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
+struct amd_sriov_msg_pf2vf_info {
+	/* header contains size and version */
+	struct amd_sriov_msg_pf2vf_info_header header;
+	/* use private key from mailbox 2 to create checksum */
+	uint32_t checksum;
+	/* The features flags of the HOST driver supports */
+	union amd_sriov_msg_feature_flags feature_flags;
+	/* (max_width * max_height * fps) / (16 * 16) */
+	uint32_t hevc_enc_max_mb_per_second;
+	/* (max_width * max_height) / (16 * 16) */
+	uint32_t hevc_enc_max_mb_per_frame;
+	/* (max_width * max_height * fps) / (16 * 16) */
+	uint32_t avc_enc_max_mb_per_second;
+	/* (max_width * max_height) / (16 * 16) */
+	uint32_t avc_enc_max_mb_per_frame;
+	/* MEC FW position in BYTE from the start of VF visible frame buffer */
+	uint64_t mecfw_offset;
+	/* MEC FW size in BYTE */
+	uint32_t mecfw_size;
+	/* UVD FW position in BYTE from the start of VF visible frame buffer */
+	uint64_t uvdfw_offset;
+	/* UVD FW size in BYTE */
+	uint32_t uvdfw_size;
+	/* VCE FW position in BYTE from the start of VF visible frame buffer */
+	uint64_t vcefw_offset;
+	/* VCE FW size in BYTE */
+	uint32_t vcefw_size;
+	/* Bad pages block position in BYTE */
+	uint32_t bp_block_offset_low;
+	uint32_t bp_block_offset_high;
+	/* Bad pages block size in BYTE */
+	uint32_t bp_block_size;
+	/* frequency for VF to update the VF2PF area in msec, 0 = manual */
+	uint32_t vf2pf_update_interval_ms;
+	/* identification in ROCm SMI */
+	uint64_t uuid;
+	uint32_t fcn_idx;
+	/* flags to indicate which register access method VF should use */
+	union amd_sriov_reg_access_flags reg_access_flags;
+	/* MM BW management */
+	struct {
+		uint32_t decode_max_dimension_pixels;
+		uint32_t decode_max_frame_pixels;
+		uint32_t encode_max_dimension_pixels;
+		uint32_t encode_max_frame_pixels;
+	} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
+	/* UUID info */
+	struct amd_sriov_msg_uuid_info uuid_info;
+	/* PCIE atomic ops support flag */
+	uint32_t pcie_atomic_ops_support_flags;
+	/* Portion of GPU memory occupied by VF.  MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
+	uint32_t gpu_capacity;
+	/* vf bdf on host pci tree for debug only */
+	uint32_t bdf_on_host;
+	uint32_t more_bp;	//Reserved for future use.
+	union amd_sriov_ras_caps ras_en_caps;
+	union amd_sriov_ras_caps ras_telemetry_en_caps;
+
+	/* reserved */
+	uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
+} __packed;
+
+struct amd_sriov_msg_vf2pf_info_header {
+	/* the total structure size in byte */
+	uint32_t size;
+	/* version of this structure, written by the guest */
+	uint32_t version;
+	/* reserved */
+	uint32_t reserved[2];
+};
+
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
+struct amd_sriov_msg_vf2pf_info {
+	/* header contains size and version */
+	struct amd_sriov_msg_vf2pf_info_header header;
+	uint32_t checksum;
+	/* driver version */
+	uint8_t driver_version[64];
+	/* driver certification, 1=WHQL, 0=None */
+	uint32_t driver_cert;
+	/* guest OS type and version */
+	union amd_sriov_msg_os_info os_info;
+	/* guest fb information in the unit of MB */
+	uint32_t fb_usage;
+	/* guest gfx engine usage percentage */
+	uint32_t gfx_usage;
+	/* guest gfx engine health percentage */
+	uint32_t gfx_health;
+	/* guest compute engine usage percentage */
+	uint32_t compute_usage;
+	/* guest compute engine health percentage */
+	uint32_t compute_health;
+	/* guest avc engine usage percentage. 0xffff means N/A */
+	uint32_t avc_enc_usage;
+	/* guest avc engine health percentage. 0xffff means N/A */
+	uint32_t avc_enc_health;
+	/* guest hevc engine usage percentage. 0xffff means N/A */
+	uint32_t hevc_enc_usage;
+	/* guest hevc engine usage percentage. 0xffff means N/A */
+	uint32_t hevc_enc_health;
+	/* combined encode/decode usage */
+	uint32_t encode_usage;
+	uint32_t decode_usage;
+	/* Version of PF2VF that VF understands */
+	uint32_t pf2vf_version_required;
+	/* additional FB usage */
+	uint32_t fb_vis_usage;
+	uint32_t fb_vis_size;
+	uint32_t fb_size;
+	/* guest ucode data, each one is 1.25 Dword */
+	struct {
+		uint8_t id;
+		uint32_t version;
+	} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
+	uint64_t dummy_page_addr;
+	/* FB allocated for guest MES to record UQ info */
+	uint64_t mes_info_addr;
+	uint32_t mes_info_size;
+	/* reserved */
+	uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
+} __packed;
+
+/* mailbox message send from guest to host  */
+enum amd_sriov_mailbox_request_message {
+	MB_REQ_MSG_REQ_GPU_INIT_ACCESS = 1,
+	MB_REQ_MSG_REL_GPU_INIT_ACCESS,
+	MB_REQ_MSG_REQ_GPU_FINI_ACCESS,
+	MB_REQ_MSG_REL_GPU_FINI_ACCESS,
+	MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
+	MB_REQ_MSG_REQ_GPU_INIT_DATA,
+	MB_REQ_MSG_PSP_VF_CMD_RELAY,
+
+	MB_REQ_MSG_LOG_VF_ERROR = 200,
+	MB_REQ_MSG_READY_TO_RESET = 201,
+	MB_REQ_MSG_RAS_POISON = 202,
+	MB_REQ_RAS_ERROR_COUNT = 203,
+	MB_REQ_RAS_CPER_DUMP = 204,
+	MB_REQ_RAS_BAD_PAGES = 205,
+};
+
+/* mailbox message send from host to guest  */
+enum amd_sriov_mailbox_response_message {
+	MB_RES_MSG_CLR_MSG_BUF			= 0,
+	MB_RES_MSG_READY_TO_ACCESS_GPU		= 1,
+	MB_RES_MSG_FLR_NOTIFICATION		= 2,
+	MB_RES_MSG_FLR_NOTIFICATION_COMPLETION  = 3,
+	MB_RES_MSG_SUCCESS			= 4,
+	MB_RES_MSG_FAIL				= 5,
+	MB_RES_MSG_QUERY_ALIVE			= 6,
+	MB_RES_MSG_GPU_INIT_DATA_READY		= 7,
+	MB_RES_MSG_RAS_POISON_READY		= 8,
+	MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION	= 9,
+	MB_RES_MSG_GPU_RMA			= 10,
+	MB_RES_MSG_RAS_ERROR_COUNT_READY	= 11,
+	MB_REQ_RAS_CPER_DUMP_READY		= 14,
+	MB_RES_MSG_RAS_BAD_PAGES_READY		= 15,
+	MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION	= 16,
+	MB_RES_MSG_UNRECOV_ERR_NOTIFICATION	= 17,
+	MB_RES_MSG_TEXT_MESSAGE			= 255
+};
+
+enum amd_sriov_ras_telemetry_gpu_block {
+	RAS_TELEMETRY_GPU_BLOCK_UMC		= 0,
+	RAS_TELEMETRY_GPU_BLOCK_SDMA		= 1,
+	RAS_TELEMETRY_GPU_BLOCK_GFX		= 2,
+	RAS_TELEMETRY_GPU_BLOCK_MMHUB		= 3,
+	RAS_TELEMETRY_GPU_BLOCK_ATHUB		= 4,
+	RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF	= 5,
+	RAS_TELEMETRY_GPU_BLOCK_HDP		= 6,
+	RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL	= 7,
+	RAS_TELEMETRY_GPU_BLOCK_DF		= 8,
+	RAS_TELEMETRY_GPU_BLOCK_SMN		= 9,
+	RAS_TELEMETRY_GPU_BLOCK_SEM		= 10,
+	RAS_TELEMETRY_GPU_BLOCK_MP0		= 11,
+	RAS_TELEMETRY_GPU_BLOCK_MP1		= 12,
+	RAS_TELEMETRY_GPU_BLOCK_FUSE		= 13,
+	RAS_TELEMETRY_GPU_BLOCK_MCA		= 14,
+	RAS_TELEMETRY_GPU_BLOCK_VCN		= 15,
+	RAS_TELEMETRY_GPU_BLOCK_JPEG		= 16,
+	RAS_TELEMETRY_GPU_BLOCK_IH		= 17,
+	RAS_TELEMETRY_GPU_BLOCK_MPIO		= 18,
+	RAS_TELEMETRY_GPU_BLOCK_COUNT		= 19,
+};
+
+struct amd_sriov_ras_telemetry_header {
+	uint32_t checksum;
+	uint32_t used_size;
+	uint32_t reserved[2];
+};
+
+struct amd_sriov_ras_telemetry_error_count {
+	struct {
+		uint32_t ce_count;
+		uint32_t ue_count;
+		uint32_t de_count;
+		uint32_t ce_overflow_count;
+		uint32_t ue_overflow_count;
+		uint32_t de_overflow_count;
+		uint32_t reserved[6];
+	} block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
+};
+
+struct amd_sriov_ras_cper_dump {
+	uint32_t more;
+	uint64_t overflow_count;
+	uint64_t count;
+	uint64_t wptr;
+	uint32_t buf[];
+};
+
+struct amd_sriov_ras_chk_criti {
+	uint32_t hit;
+};
+
+struct amdsriov_ras_telemetry {
+	struct amd_sriov_ras_telemetry_header header;
+
+	union {
+		struct amd_sriov_ras_telemetry_error_count error_count;
+		struct amd_sriov_ras_cper_dump cper_dump;
+		struct amd_sriov_ras_chk_criti chk_criti;
+	} body;
+};
+
+/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
+enum amd_sriov_gpu_init_data_version {
+	GPU_INIT_DATA_READY_V1 = 1,
+};
+
+#pragma pack(pop) // Restore previous packing option
+
+/* checksum function between host and guest */
+unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key,
+				    unsigned int checksum);
+
+/* assertion at compile time */
+#ifdef __linux__
+#define stringification(s)  _stringification(s)
+#define _stringification(s) #s
+
+_Static_assert(
+	sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
+	"amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+
+_Static_assert(
+	sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
+	"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
+
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
+	       "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
+
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
+	       "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+
+#undef _stringification
+#undef stringification
+#endif
+
+#endif /* AMDGV_SRIOV_MSG__H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
new file mode 100644
index 000000000000..f9e2edf5260b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -0,0 +1,986 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "amdgpu_reg_state.h"
+#include "amdgpu_xcp.h"
+#include "gfx_v9_4_3.h"
+#include "gfxhub_v1_2.h"
+#include "sdma_v4_4_2.h"
+#include "amdgpu_ip.h"
+
+#define XCP_INST_MASK(num_inst, xcp_id)                                        \
+	(num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0)
+
+void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START;
+
+	adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START;
+
+	adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
+	adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
+	adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;
+
+	adev->doorbell_index.sdma_doorbell_range = 20;
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		adev->doorbell_index.sdma_engine[i] =
+			AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START +
+			i * (adev->doorbell_index.sdma_doorbell_range >> 1);
+
+	adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH;
+	adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START;
+
+	adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP;
+	adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP;
+
+	adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
+}
+
+/* Fixed pattern for smn addressing on different AIDs:
+ *   bit[34]: indicate cross AID access
+ *   bit[33:32]: indicate target AID id
+ * AID id range is 0 ~ 3 as maximum AID number is 4.
+ */
+u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id)
+{
+	u64 ext_offset;
+
+	/* local routing and bit[34:32] will be zeros */
+	if (ext_id == 0)
+		return 0;
+
+	/* Initiated from host, accessing to all non-zero aids are cross traffic */
+	ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34);
+
+	return ext_offset;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	int num_xcc, num_xcc_per_xcp = 0, mode = 0;
+
+	num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+	if (adev->gfx.funcs->get_xccs_per_xcp)
+		num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
+	if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0))
+		mode = num_xcc / num_xcc_per_xcp;
+
+	if (num_xcc_per_xcp == 1)
+		return AMDGPU_CPX_PARTITION_MODE;
+
+	switch (mode) {
+	case 1:
+		return AMDGPU_SPX_PARTITION_MODE;
+	case 2:
+		return AMDGPU_DPX_PARTITION_MODE;
+	case 3:
+		return AMDGPU_TPX_PARTITION_MODE;
+	case 4:
+		return AMDGPU_QPX_PARTITION_MODE;
+	default:
+		return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+	}
+
+	return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	enum amdgpu_gfx_partition derv_mode,
+		mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+	struct amdgpu_device *adev = xcp_mgr->adev;
+
+	derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr);
+
+	if (amdgpu_sriov_vf(adev))
+		return derv_mode;
+
+	if (adev->nbio.funcs->get_compute_partition_mode) {
+		mode = adev->nbio.funcs->get_compute_partition_mode(adev);
+		if (mode != derv_mode) {
+			dev_warn(
+				adev->dev,
+				"Mismatch in compute partition mode - reported : %d derived : %d",
+				mode, derv_mode);
+			if (derv_mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
+				amdgpu_device_bus_status_check(adev);
+		}
+	}
+
+	return mode;
+}
+
+static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode)
+{
+	int num_xcc, num_xcc_per_xcp = 0;
+
+	num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+	switch (mode) {
+	case AMDGPU_SPX_PARTITION_MODE:
+		num_xcc_per_xcp = num_xcc;
+		break;
+	case AMDGPU_DPX_PARTITION_MODE:
+		num_xcc_per_xcp = num_xcc / 2;
+		break;
+	case AMDGPU_TPX_PARTITION_MODE:
+		num_xcc_per_xcp = num_xcc / 3;
+		break;
+	case AMDGPU_QPX_PARTITION_MODE:
+		num_xcc_per_xcp = num_xcc / 4;
+		break;
+	case AMDGPU_CPX_PARTITION_MODE:
+		num_xcc_per_xcp = 1;
+		break;
+	}
+
+	return num_xcc_per_xcp;
+}
+
+static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+				    enum AMDGPU_XCP_IP_BLOCK ip_id,
+				    struct amdgpu_xcp_ip *ip)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	int num_sdma, num_vcn, num_shared_vcn, num_xcp;
+	int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp;
+
+	num_sdma = adev->sdma.num_instances;
+	num_vcn = adev->vcn.num_vcn_inst;
+	num_shared_vcn = 1;
+
+	num_xcc_xcp = adev->gfx.num_xcc_per_xcp;
+	num_xcp = NUM_XCC(adev->gfx.xcc_mask) / num_xcc_xcp;
+
+	switch (xcp_mgr->mode) {
+	case AMDGPU_SPX_PARTITION_MODE:
+	case AMDGPU_DPX_PARTITION_MODE:
+	case AMDGPU_TPX_PARTITION_MODE:
+	case AMDGPU_QPX_PARTITION_MODE:
+	case AMDGPU_CPX_PARTITION_MODE:
+		num_sdma_xcp = DIV_ROUND_UP(num_sdma, num_xcp);
+		num_vcn_xcp = DIV_ROUND_UP(num_vcn, num_xcp);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (num_vcn && num_xcp > num_vcn)
+		num_shared_vcn = num_xcp / num_vcn;
+
+	switch (ip_id) {
+	case AMDGPU_XCP_GFXHUB:
+		ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+		ip->ip_funcs = &gfxhub_v1_2_xcp_funcs;
+		break;
+	case AMDGPU_XCP_GFX:
+		ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id);
+		ip->ip_funcs = &gfx_v9_4_3_xcp_funcs;
+		break;
+	case AMDGPU_XCP_SDMA:
+		ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id);
+		ip->ip_funcs = &sdma_v4_4_2_xcp_funcs;
+		break;
+	case AMDGPU_XCP_VCN:
+		ip->inst_mask =
+			XCP_INST_MASK(num_vcn_xcp, xcp_id / num_shared_vcn);
+		/* TODO : Assign IP funcs */
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ip->ip_id = ip_id;
+
+	return 0;
+}
+
+static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
+					    int px_mode, int *num_xcp,
+					    uint16_t *nps_modes)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+	if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
+		return -EINVAL;
+
+	switch (px_mode) {
+	case AMDGPU_SPX_PARTITION_MODE:
+		*num_xcp = 1;
+		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+		break;
+	case AMDGPU_DPX_PARTITION_MODE:
+		*num_xcp = 2;
+		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+			     BIT(AMDGPU_NPS2_PARTITION_MODE);
+		break;
+	case AMDGPU_TPX_PARTITION_MODE:
+		*num_xcp = 3;
+		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+			     BIT(AMDGPU_NPS4_PARTITION_MODE);
+		break;
+	case AMDGPU_QPX_PARTITION_MODE:
+		*num_xcp = 4;
+		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+			     BIT(AMDGPU_NPS4_PARTITION_MODE);
+		if (gc_ver == IP_VERSION(9, 5, 0))
+			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+		break;
+	case AMDGPU_CPX_PARTITION_MODE:
+		*num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+		*nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+			     BIT(AMDGPU_NPS4_PARTITION_MODE);
+		if (gc_ver == IP_VERSION(9, 5, 0))
+			*nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+					  int mode,
+					  struct amdgpu_xcp_cfg *xcp_cfg)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	int max_res[AMDGPU_XCP_RES_MAX] = {};
+	bool res_lt_xcp;
+	int num_xcp, i, r;
+	u16 nps_modes;
+
+	if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+		return -EINVAL;
+
+	max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+	max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+	max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+	max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+	r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes);
+	if (r)
+		return r;
+
+	xcp_cfg->compatible_nps_modes =
+		(adev->gmc.supported_nps_modes & nps_modes);
+	xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+	for (i = 0; i < xcp_cfg->num_res; i++) {
+		res_lt_xcp = max_res[i] < num_xcp;
+		xcp_cfg->xcp_res[i].id = i;
+		xcp_cfg->xcp_res[i].num_inst =
+			res_lt_xcp ? 1 : max_res[i] / num_xcp;
+		xcp_cfg->xcp_res[i].num_inst =
+			i == AMDGPU_XCP_RES_JPEG ?
+			xcp_cfg->xcp_res[i].num_inst *
+			adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+		xcp_cfg->xcp_res[i].num_shared =
+			res_lt_xcp ? num_xcp / max_res[i] : 1;
+	}
+
+	return 0;
+}
+
+static enum amdgpu_gfx_partition
+__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	int num_xcc;
+
+	num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask);
+
+	if (adev->gmc.num_mem_partitions == 1)
+		return AMDGPU_SPX_PARTITION_MODE;
+
+	if (adev->gmc.num_mem_partitions == num_xcc)
+		return AMDGPU_CPX_PARTITION_MODE;
+
+	if (adev->gmc.num_mem_partitions == num_xcc / 2)
+		return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
+						    AMDGPU_CPX_PARTITION_MODE;
+
+	if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
+		return AMDGPU_DPX_PARTITION_MODE;
+
+	return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE;
+}
+
+static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+					  enum amdgpu_gfx_partition mode)
+{
+	struct amdgpu_device *adev = xcp_mgr->adev;
+	int num_xcc, num_xccs_per_xcp, r;
+	int num_xcp, nps_mode;
+	u16 supp_nps_modes;
+	bool comp_mode;
+
+	nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp,
+					       &supp_nps_modes);
+	if (r)
+		return false;
+
+	comp_mode = !!(BIT(nps_mode) & supp_nps_modes);
+	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+	switch (mode) {
+	case AMDGPU_SPX_PARTITION_MODE:
+		return comp_mode && num_xcc > 0;
+	case AMDGPU_DPX_PARTITION_MODE:
+		return comp_mode && (num_xcc % 4) == 0;
+	case AMDGPU_TPX_PARTITION_MODE:
+		return comp_mode && ((num_xcc % 3) == 0);
+	case AMDGPU_QPX_PARTITION_MODE:
+		num_xccs_per_xcp = num_xcc / 4;
+		return comp_mode && (num_xccs_per_xcp >= 2);
+	case AMDGPU_CPX_PARTITION_MODE:
+		return comp_mode && (num_xcc > 1);
+	default:
+		return false;
+	}
+
+	return false;
+}
+
+static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+	int mode;
+
+	xcp_mgr->avail_xcp_modes = 0;
+
+	for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+		if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+			xcp_mgr->avail_xcp_modes |= BIT(mode);
+	}
+}
+
+static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
+					       int mode, int *num_xcps)
+{
+	int num_xcc_per_xcp, num_xcc, ret;
+	struct amdgpu_device *adev;
+	u32 flags = 0;
+
+	adev = xcp_mgr->adev;
+	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+
+	if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
+		mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
+		if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
+			dev_err(adev->dev,
+				"Invalid config, no compatible compute partition mode found, available memory partitions: %d",
+				adev->gmc.num_mem_partitions);
+			return -EINVAL;
+		}
+	} else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
+		dev_err(adev->dev,
+			"Invalid compute partition mode requested, requested: %s, available memory partitions: %d",
+			amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions);
+		return -EINVAL;
+	}
+
+	if (adev->kfd.init_complete && !amdgpu_in_reset(adev) &&
+		!adev->in_suspend)
+		flags |= AMDGPU_XCP_OPS_KFD;
+
+	if (flags & AMDGPU_XCP_OPS_KFD) {
+		ret = amdgpu_amdkfd_check_and_lock_kfd(adev);
+		if (ret)
+			goto out;
+	}
+
+	ret = amdgpu_xcp_pre_partition_switch(xcp_mgr, flags);
+	if (ret)
+		goto unlock;
+
+	num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode);
+	if (adev->gfx.funcs->switch_partition_mode)
+		adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev,
+						       num_xcc_per_xcp);
+
+	/* Init info about new xcps */
+	*num_xcps = num_xcc / num_xcc_per_xcp;
+	amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
+
+	ret = amdgpu_xcp_post_partition_switch(xcp_mgr, flags);
+	if (!ret)
+		__aqua_vanjaram_update_available_partition_mode(xcp_mgr);
+unlock:
+	if (flags & AMDGPU_XCP_OPS_KFD)
+		amdgpu_amdkfd_unlock_kfd(adev);
+out:
+	return ret;
+}
+
+static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev,
+					  int xcc_id, uint8_t *mem_id)
+{
+	/* memory/spatial modes validation check is already done */
+	*mem_id = xcc_id / adev->gfx.num_xcc_per_xcp;
+	*mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition;
+
+	return 0;
+}
+
+static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr,
+					struct amdgpu_xcp *xcp, uint8_t *mem_id)
+{
+	struct amdgpu_numa_info numa_info;
+	struct amdgpu_device *adev;
+	uint32_t xcc_mask;
+	int r, i, xcc_id;
+
+	adev = xcp_mgr->adev;
+	/* TODO: BIOS is not returning the right info now
+	 * Check on this later
+	 */
+	/*
+	if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+		mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	*/
+	if (adev->gmc.num_mem_partitions == 1) {
+		/* Only one range */
+		*mem_id = 0;
+		return 0;
+	}
+
+	r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask);
+	if (r || !xcc_mask)
+		return -EINVAL;
+
+	xcc_id = ffs(xcc_mask) - 1;
+	if (!adev->gmc.is_app_apu)
+		return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id);
+
+	r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+
+	if (r)
+		return r;
+
+	r = -EINVAL;
+	for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+		if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) {
+			*mem_id = i;
+			r = 0;
+			break;
+		}
+	}
+
+	return r;
+}
+
+static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id,
+				     enum AMDGPU_XCP_IP_BLOCK ip_id,
+				     struct amdgpu_xcp_ip *ip)
+{
+	if (!ip)
+		return -EINVAL;
+
+	return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip);
+}
+
+struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
+	.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
+	.query_partition_mode = &aqua_vanjaram_query_partition_mode,
+	.get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+	.get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info,
+	.get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
+};
+
+static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
+{
+	int ret;
+
+	if (amdgpu_sriov_vf(adev))
+		aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL;
+
+	ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1,
+				  &aqua_vanjaram_xcp_funcs);
+	if (ret)
+		return ret;
+
+	amdgpu_xcp_update_supported_modes(adev->xcp_mgr);
+	/* TODO: Default memory node affinity init */
+
+	return ret;
+}
+
+int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
+{
+	u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask;
+	int ret, i;
+
+	/* generally 1 AID supports 4 instances */
+	adev->sdma.num_inst_per_aid = 4;
+	adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask);
+
+	adev->aid_mask = i = 1;
+	inst_mask >>= adev->sdma.num_inst_per_aid;
+
+	for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
+	     inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
+		avail_inst = inst_mask & mask;
+		if (avail_inst == mask || avail_inst == 0x3 ||
+		    avail_inst == 0xc)
+			adev->aid_mask |= (1 << i);
+	}
+
+	/* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be
+	 * addressed based on logical instance ids.
+	 */
+	adev->vcn.harvest_config = 0;
+	adev->vcn.num_inst_per_aid = 1;
+	adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask);
+	adev->jpeg.harvest_config = 0;
+	adev->jpeg.num_inst_per_aid = 1;
+	adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask);
+
+	ret = aqua_vanjaram_xcp_mgr_init(adev);
+	if (ret)
+		return ret;
+
+	amdgpu_ip_map_init(adev);
+
+	return 0;
+}
+
+static void aqua_read_smn(struct amdgpu_device *adev,
+			  struct amdgpu_smn_reg_data *regdata,
+			  uint64_t smn_addr)
+{
+	regdata->addr = smn_addr;
+	regdata->value = RREG32_PCIE(smn_addr);
+}
+
+struct aqua_reg_list {
+	uint64_t start_addr;
+	uint32_t num_regs;
+	uint32_t incrx;
+};
+
+#define DW_ADDR_INCR	4
+
+static void aqua_read_smn_ext(struct amdgpu_device *adev,
+			      struct amdgpu_smn_reg_data *regdata,
+			      uint64_t smn_addr, int i)
+{
+	regdata->addr =
+		smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i);
+	regdata->value = RREG32_PCIE_EXT(regdata->addr);
+}
+
+#define smnreg_0x1A340218	0x1A340218
+#define smnreg_0x1A3402E4	0x1A3402E4
+#define smnreg_0x1A340294	0x1A340294
+#define smreg_0x1A380088	0x1A380088
+
+#define NUM_PCIE_SMN_REGS	14
+
+static struct aqua_reg_list pcie_reg_addrs[] = {
+	{ smnreg_0x1A340218, 1, 0 },
+	{ smnreg_0x1A3402E4, 1, 0 },
+	{ smnreg_0x1A340294, 6, DW_ADDR_INCR },
+	{ smreg_0x1A380088, 6, DW_ADDR_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_pcie_v1_0 *pcie_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	struct pci_dev *us_pdev, *ds_pdev;
+	int aer_cap, r, n;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf;
+
+	szbuf = sizeof(*pcie_reg_state) +
+		amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf +
+						     sizeof(*pcie_reg_state));
+	pcie_regs->inst_header.instance = 0;
+	pcie_regs->inst_header.state = AMDGPU_INST_S_OK;
+	pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS;
+
+	reg_data = pcie_regs->smn_reg_values;
+
+	for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) {
+		start_addr = pcie_reg_addrs[r].start_addr;
+		incrx = pcie_reg_addrs[r].incrx;
+		num_regs = pcie_reg_addrs[r].num_regs;
+		for (n = 0; n < num_regs; n++) {
+			aqua_read_smn(adev, reg_data, start_addr + n * incrx);
+			++reg_data;
+		}
+	}
+
+	ds_pdev = pci_upstream_bridge(adev->pdev);
+	us_pdev = pci_upstream_bridge(ds_pdev);
+
+	pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA,
+				  &pcie_regs->device_status);
+	pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA,
+				  &pcie_regs->link_status);
+
+	aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR);
+	if (aer_cap) {
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS,
+				      &pcie_regs->pcie_corr_err_status);
+		pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS,
+				      &pcie_regs->pcie_uncorr_err_status);
+	}
+
+	pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS,
+			      &pcie_regs->sub_bus_number_latency);
+
+	pcie_reg_state->common_header.structure_size = szbuf;
+	pcie_reg_state->common_header.format_revision = 1;
+	pcie_reg_state->common_header.content_revision = 0;
+	pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE;
+	pcie_reg_state->common_header.num_instances = 1;
+
+	return pcie_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11A00050	0x11A00050
+#define smnreg_0x11A00180	0x11A00180
+#define smnreg_0x11A00070	0x11A00070
+#define smnreg_0x11A00200	0x11A00200
+#define smnreg_0x11A0020C	0x11A0020C
+#define smnreg_0x11A00210	0x11A00210
+#define smnreg_0x11A00108	0x11A00108
+
+#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_XGMI_SMN_REGS 25
+
+static struct aqua_reg_list xgmi_reg_addrs[] = {
+	{ smnreg_0x11A00050, 1, 0 },
+	{ smnreg_0x11A00180, 16, DW_ADDR_INCR },
+	{ smnreg_0x11A00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11A00200, 1, 0 },
+	{ smnreg_0x11A0020C, 1, 0 },
+	{ smnreg_0x11A00210, 1, 0 },
+	{ smnreg_0x11A00108, 1, 0 },
+};
+
+static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_xgmi_v1_0 *xgmi_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_xgmi_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int xgmi_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf;
+
+	szbuf = sizeof(*xgmi_reg_state) +
+		amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs),
+				    NUM_XGMI_SMN_REGS);
+	/* Only one instance of pcie regs */
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &xgmi_reg_state->xgmi_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < xgmi_inst; ++j) {
+			xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p;
+			xgmi_regs->inst_header.instance = inst++;
+
+			xgmi_regs->inst_header.state = AMDGPU_INST_S_OK;
+			xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS;
+
+			reg_data = xgmi_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) {
+				start_addr = xgmi_reg_addrs[r].start_addr;
+				incrx = xgmi_reg_addrs[r].incrx;
+				num_regs = xgmi_reg_addrs[r].num_regs;
+
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						XGMI_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	xgmi_reg_state->common_header.structure_size = szbuf;
+	xgmi_reg_state->common_header.format_revision = 1;
+	xgmi_reg_state->common_header.content_revision = 0;
+	xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI;
+	xgmi_reg_state->common_header.num_instances = max_xgmi_instances;
+
+	return xgmi_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x11C00070	0x11C00070
+#define smnreg_0x11C00210	0x11C00210
+
+static struct aqua_reg_list wafl_reg_addrs[] = {
+	{ smnreg_0x11C00070, 4, DW_ADDR_INCR },
+	{ smnreg_0x11C00210, 1, 0 },
+};
+
+#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20))
+
+#define NUM_WAFL_SMN_REGS 5
+
+static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev,
+					     void *buf, size_t max_size)
+{
+	struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state;
+	uint32_t start_addr, incrx, num_regs, szbuf;
+	struct amdgpu_regs_wafl_v1_0 *wafl_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_wafl_instances = 8;
+	int inst = 0, i, j, r, n;
+	const int wafl_inst = 2;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf;
+
+	szbuf = sizeof(*wafl_reg_state) +
+		amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs),
+				    NUM_WAFL_SMN_REGS);
+
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &wafl_reg_state->wafl_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		for (j = 0; j < wafl_inst; ++j) {
+			wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p;
+			wafl_regs->inst_header.instance = inst++;
+
+			wafl_regs->inst_header.state = AMDGPU_INST_S_OK;
+			wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS;
+
+			reg_data = wafl_regs->smn_reg_values;
+
+			for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) {
+				start_addr = wafl_reg_addrs[r].start_addr;
+				incrx = wafl_reg_addrs[r].incrx;
+				num_regs = wafl_reg_addrs[r].num_regs;
+				for (n = 0; n < num_regs; n++) {
+					aqua_read_smn_ext(
+						adev, reg_data,
+						WAFL_LINK_REG(start_addr, j) +
+							n * incrx,
+						i);
+					++reg_data;
+				}
+			}
+			p = reg_data;
+		}
+	}
+
+	wafl_reg_state->common_header.structure_size = szbuf;
+	wafl_reg_state->common_header.format_revision = 1;
+	wafl_reg_state->common_header.content_revision = 0;
+	wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL;
+	wafl_reg_state->common_header.num_instances = max_wafl_instances;
+
+	return wafl_reg_state->common_header.structure_size;
+}
+
+#define smnreg_0x1B311060 0x1B311060
+#define smnreg_0x1B411060 0x1B411060
+#define smnreg_0x1B511060 0x1B511060
+#define smnreg_0x1B611060 0x1B611060
+
+#define smnreg_0x1C307120 0x1C307120
+#define smnreg_0x1C317120 0x1C317120
+
+#define smnreg_0x1C320830 0x1C320830
+#define smnreg_0x1C380830 0x1C380830
+#define smnreg_0x1C3D0830 0x1C3D0830
+#define smnreg_0x1C420830 0x1C420830
+
+#define smnreg_0x1C320100 0x1C320100
+#define smnreg_0x1C380100 0x1C380100
+#define smnreg_0x1C3D0100 0x1C3D0100
+#define smnreg_0x1C420100 0x1C420100
+
+#define smnreg_0x1B310500 0x1B310500
+#define smnreg_0x1C300400 0x1C300400
+
+#define USR_CAKE_INCR 0x11000
+#define USR_LINK_INCR 0x100000
+#define USR_CP_INCR 0x10000
+
+#define NUM_USR_SMN_REGS	20
+
+struct aqua_reg_list usr_reg_addrs[] = {
+	{ smnreg_0x1B311060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B411060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B511060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1B611060, 4, DW_ADDR_INCR },
+	{ smnreg_0x1C307120, 2, DW_ADDR_INCR },
+	{ smnreg_0x1C317120, 2, DW_ADDR_INCR },
+};
+
+#define NUM_USR1_SMN_REGS	46
+struct aqua_reg_list usr1_reg_addrs[] = {
+	{ smnreg_0x1C320830, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0830, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420830, 4, USR_CAKE_INCR },
+	{ smnreg_0x1C320100, 6, USR_CAKE_INCR },
+	{ smnreg_0x1C380100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C3D0100, 5, USR_CAKE_INCR },
+	{ smnreg_0x1C420100, 4, USR_CAKE_INCR },
+	{ smnreg_0x1B310500, 4, USR_LINK_INCR },
+	{ smnreg_0x1C300400, 2, USR_CP_INCR },
+};
+
+static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev,
+					    void *buf, size_t max_size,
+					    int reg_state)
+{
+	uint32_t start_addr, incrx, num_regs, szbuf, num_smn;
+	struct amdgpu_reg_state_usr_v1_0 *usr_reg_state;
+	struct amdgpu_regs_usr_v1_0 *usr_regs;
+	struct amdgpu_smn_reg_data *reg_data;
+	const int max_usr_instances = 4;
+	struct aqua_reg_list *reg_addrs;
+	int inst = 0, i, n, r, arr_size;
+	void *p;
+
+	if (!buf || !max_size)
+		return -EINVAL;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_USR:
+		arr_size = ARRAY_SIZE(usr_reg_addrs);
+		reg_addrs = usr_reg_addrs;
+		num_smn = NUM_USR_SMN_REGS;
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		arr_size = ARRAY_SIZE(usr1_reg_addrs);
+		reg_addrs = usr1_reg_addrs;
+		num_smn = NUM_USR1_SMN_REGS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf;
+
+	szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances,
+							     sizeof(*usr_regs),
+							     num_smn);
+	if (max_size < szbuf)
+		return -EOVERFLOW;
+
+	p = &usr_reg_state->usr_state_regs[0];
+	for_each_inst(i, adev->aid_mask) {
+		usr_regs = (struct amdgpu_regs_usr_v1_0 *)p;
+		usr_regs->inst_header.instance = inst++;
+		usr_regs->inst_header.state = AMDGPU_INST_S_OK;
+		usr_regs->inst_header.num_smn_regs = num_smn;
+		reg_data = usr_regs->smn_reg_values;
+
+		for (r = 0; r < arr_size; r++) {
+			start_addr = reg_addrs[r].start_addr;
+			incrx = reg_addrs[r].incrx;
+			num_regs = reg_addrs[r].num_regs;
+			for (n = 0; n < num_regs; n++) {
+				aqua_read_smn_ext(adev, reg_data,
+						  start_addr + n * incrx, i);
+				reg_data++;
+			}
+		}
+		p = reg_data;
+	}
+
+	usr_reg_state->common_header.structure_size = szbuf;
+	usr_reg_state->common_header.format_revision = 1;
+	usr_reg_state->common_header.content_revision = 0;
+	usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR;
+	usr_reg_state->common_header.num_instances = max_usr_instances;
+
+	return usr_reg_state->common_header.structure_size;
+}
+
+ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
+				    enum amdgpu_reg_state reg_state, void *buf,
+				    size_t max_size)
+{
+	ssize_t size;
+
+	switch (reg_state) {
+	case AMDGPU_REG_STATE_TYPE_PCIE:
+		size = aqua_vanjaram_read_pcie_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_XGMI:
+		size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_WAFL:
+		size = aqua_vanjaram_read_wafl_state(adev, buf, max_size);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR:
+		size = aqua_vanjaram_read_usr_state(adev, buf, max_size,
+						    AMDGPU_REG_STATE_TYPE_USR);
+		break;
+	case AMDGPU_REG_STATE_TYPE_USR_1:
+		size = aqua_vanjaram_read_usr_state(
+			adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
new file mode 100644
index 000000000000..fda99c958c3b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "arct_ip_offset.h"
+
+int arct_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialized the block needed by our driver  */
+	uint32_t i;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+		adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i]));
+		adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i]));
+		adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i]));
+		adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i]));
+		adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i]));
+		adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+		adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i]));
+		adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));
+	}
+	return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
new file mode 100644
index 000000000000..42f4e163e251
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "athub_v1_0.h"
+
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "vega10_enum.h"
+
+#include "soc15_common.h"
+
+static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						   bool enable)
+{
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+						  bool enable)
+{
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+	if(def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
+{
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(9, 0, 0):
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 0):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+	case IP_VERSION(1, 5, 0):
+		athub_update_medium_grain_clock_gating(adev,
+				state == AMD_CG_STATE_GATE);
+		athub_update_medium_grain_light_sleep(adev,
+				state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+	int data;
+
+	if (amdgpu_sriov_vf(adev))
+		*flags = 0;
+
+	/* AMD_CG_SUPPORT_ATHUB_MGCG */
+	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+	/* AMD_CG_SUPPORT_ATHUB_LS */
+	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
new file mode 100644
index 000000000000..6be0a6704ea7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V1_0_H__
+#define __ATHUB_V1_0_H__
+
+int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
new file mode 100644
index 000000000000..5a122f50a6e7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_0.h"
+
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_default.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+		return;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable)
+		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t def, data;
+
+	if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+	       (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
+		return;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable)
+		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
+{
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(1, 3, 1):
+	case IP_VERSION(2, 0, 0):
+	case IP_VERSION(2, 0, 2):
+		athub_v2_0_update_medium_grain_clock_gating(adev,
+				state == AMD_CG_STATE_GATE);
+		athub_v2_0_update_medium_grain_light_sleep(adev,
+				state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+	int data;
+
+	/* AMD_CG_SUPPORT_ATHUB_MGCG */
+	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+	/* AMD_CG_SUPPORT_ATHUB_LS */
+	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
new file mode 100644
index 000000000000..8b763f6dfd81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_0_H__
+#define __ATHUB_V2_0_H__
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
new file mode 100644
index 000000000000..e143fcc46148
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_1.h"
+
+#include "athub/athub_2_1_0_offset.h"
+#include "athub/athub_2_1_0_sh_mask.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+	if(def != data)
+		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
+{
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(2, 1, 0):
+	case IP_VERSION(2, 1, 1):
+	case IP_VERSION(2, 1, 2):
+	case IP_VERSION(2, 4, 0):
+		athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE);
+		athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+	int data;
+
+	/* AMD_CG_SUPPORT_ATHUB_MGCG */
+	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+	/* AMD_CG_SUPPORT_ATHUB_LS */
+	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
new file mode 100644
index 000000000000..b799f14bce03
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_1_H__
+#define __ATHUB_V2_1_H__
+
+int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
new file mode 100644
index 000000000000..d1bba9c64e16
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v3_0.h"
+#include "athub/athub_3_0_0_offset.h"
+#include "athub/athub_3_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "soc15_common.h"
+
+#define regATHUB_MISC_CNTL_V3_0_1			0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX		0
+#define regATHUB_MISC_CNTL_V3_3_0			0x00d8
+#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX		0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(3, 0, 1):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+		break;
+	case IP_VERSION(3, 3, 0):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0);
+		break;
+	default:
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+		break;
+	}
+	return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(3, 0, 1):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+		break;
+	case IP_VERSION(3, 3, 0):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data);
+		break;
+	default:
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+		break;
+	}
+}
+
+static void
+athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	uint32_t def, data;
+
+	def = data = athub_v3_0_get_cg_cntl(adev);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+	if (def != data)
+		athub_v3_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t def, data;
+
+	def = data = athub_v3_0_get_cg_cntl(adev);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+	if (def != data)
+		athub_v3_0_set_cg_cntl(adev, data);
+}
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state)
+{
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(3, 0, 0):
+	case IP_VERSION(3, 0, 1):
+	case IP_VERSION(3, 0, 2):
+	case IP_VERSION(3, 3, 0):
+		athub_v3_0_update_medium_grain_clock_gating(adev,
+				state == AMD_CG_STATE_GATE);
+		athub_v3_0_update_medium_grain_light_sleep(adev,
+				state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+	int data;
+
+	/* AMD_CG_SUPPORT_ATHUB_MGCG */
+	data = athub_v3_0_get_cg_cntl(adev);
+	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+	/* AMD_CG_SUPPORT_ATHUB_LS */
+	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
new file mode 100644
index 000000000000..e08a7d564365
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V3_0_H__
+#define __ATHUB_V3_0_H__
+
+int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
+			       enum amd_clockgating_state state);
+void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
new file mode 100644
index 000000000000..8a0773b80864
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v4_1_0.h"
+#include "athub/athub_4_1_0_offset.h"
+#include "athub/athub_4_1_0_sh_mask.h"
+#include "soc15_common.h"
+
+static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(4, 1, 0):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+		break;
+	default:
+		data = 0;
+		break;
+	}
+	return data;
+}
+
+static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(4, 1, 0):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+					      bool enable)
+{
+	uint32_t def, data;
+
+	def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
+		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+	if (def != data)
+		athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+static void
+athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t def, data;
+
+	def = data = athub_v4_1_0_get_cg_cntl(adev);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
+		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+	else
+		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+	if (def != data)
+		athub_v4_1_0_set_cg_cntl(adev, data);
+}
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+				 enum amd_clockgating_state state)
+{
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) {
+	case IP_VERSION(4, 1, 0):
+		athub_v4_1_0_update_medium_grain_clock_gating(adev,
+				state == AMD_CG_STATE_GATE);
+		athub_v4_1_0_update_medium_grain_light_sleep(adev,
+				state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+	int data;
+
+	/* AMD_CG_SUPPORT_ATHUB_MGCG */
+	data = athub_v4_1_0_get_cg_cntl(adev);
+	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+	/* AMD_CG_SUPPORT_ATHUB_LS */
+	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
new file mode 100644
index 000000000000..4d18d0998fa8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V4_1_0_H__
+#define __ATHUB_V4_1_0_H__
+
+int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
+				 enum amd_clockgating_state state);
+void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
new file mode 100644
index 000000000000..7a063e44d429
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -0,0 +1,1655 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
+
+#include <linux/unaligned.h>
+
+#include <drm/drm_util.h>
+
+#define ATOM_DEBUG
+
+#include "atomfirmware.h"
+#include "atom.h"
+#include "atom-names.h"
+#include "atom-bits.h"
+#include "amdgpu.h"
+
+#define ATOM_COND_ABOVE		0
+#define ATOM_COND_ABOVEOREQUAL	1
+#define ATOM_COND_ALWAYS	2
+#define ATOM_COND_BELOW		3
+#define ATOM_COND_BELOWOREQUAL	4
+#define ATOM_COND_EQUAL		5
+#define ATOM_COND_NOTEQUAL	6
+
+#define ATOM_PORT_ATI	0
+#define ATOM_PORT_PCI	1
+#define ATOM_PORT_SYSIO	2
+
+#define ATOM_UNIT_MICROSEC	0
+#define ATOM_UNIT_MILLISEC	1
+
+#define PLL_INDEX	2
+#define PLL_DATA	3
+
+#define ATOM_CMD_TIMEOUT_SEC	20
+
+typedef struct {
+	struct atom_context *ctx;
+	uint32_t *ps, *ws;
+	int ps_size, ws_size;
+	int ps_shift;
+	uint16_t start;
+	unsigned last_jump;
+	unsigned long last_jump_jiffies;
+	bool abort;
+} atom_exec_context;
+
+int amdgpu_atom_debug;
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+
+static uint32_t atom_arg_mask[8] =
+	{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
+	  0xFF000000 };
+static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };
+
+static int atom_dst_to_src[8][4] = {
+	/* translate destination alignment field to the source alignment encoding */
+	{0, 0, 0, 0},
+	{1, 2, 3, 0},
+	{1, 2, 3, 0},
+	{1, 2, 3, 0},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+	{4, 5, 6, 7},
+};
+static int atom_def_dst[8] = { 0, 0, 1, 2, 0, 1, 2, 3 };
+
+static int debug_depth;
+#ifdef ATOM_DEBUG
+static void debug_print_spaces(int n)
+{
+	while (n--)
+		printk("   ");
+}
+
+#define DEBUG(...) do if (amdgpu_atom_debug) { printk(KERN_DEBUG __VA_ARGS__); } while (0)
+#define SDEBUG(...) do if (amdgpu_atom_debug) { printk(KERN_DEBUG); debug_print_spaces(debug_depth); printk(__VA_ARGS__); } while (0)
+#else
+#define DEBUG(...) do { } while (0)
+#define SDEBUG(...) do { } while (0)
+#endif
+
+static uint32_t atom_iio_execute(struct atom_context *ctx, int base,
+				 uint32_t index, uint32_t data)
+{
+	uint32_t temp = 0xCDCDCDCD;
+
+	while (1)
+		switch (CU8(base)) {
+		case ATOM_IIO_NOP:
+			base++;
+			break;
+		case ATOM_IIO_READ:
+			temp = ctx->card->reg_read(ctx->card, CU16(base + 1));
+			base += 3;
+			break;
+		case ATOM_IIO_WRITE:
+			ctx->card->reg_write(ctx->card, CU16(base + 1), temp);
+			base += 3;
+			break;
+		case ATOM_IIO_CLEAR:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 2));
+			base += 3;
+			break;
+		case ATOM_IIO_SET:
+			temp |=
+			    (0xFFFFFFFF >> (32 - CU8(base + 1))) << CU8(base +
+									2);
+			base += 3;
+			break;
+		case ATOM_IIO_MOVE_INDEX:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 3));
+			temp |=
+			    ((index >> CU8(base + 2)) &
+			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+									  3);
+			base += 4;
+			break;
+		case ATOM_IIO_MOVE_DATA:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 3));
+			temp |=
+			    ((data >> CU8(base + 2)) &
+			     (0xFFFFFFFF >> (32 - CU8(base + 1)))) << CU8(base +
+									  3);
+			base += 4;
+			break;
+		case ATOM_IIO_MOVE_ATTR:
+			temp &=
+			    ~((0xFFFFFFFF >> (32 - CU8(base + 1))) <<
+			      CU8(base + 3));
+			temp |=
+			    ((ctx->
+			      io_attr >> CU8(base + 2)) & (0xFFFFFFFF >> (32 -
+									  CU8
+									  (base
+									   +
+									   1))))
+			    << CU8(base + 3);
+			base += 4;
+			break;
+		case ATOM_IIO_END:
+			return temp;
+		default:
+			pr_info("Unknown IIO opcode\n");
+			return 0;
+		}
+}
+
+static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr,
+				 int *ptr, uint32_t *saved, int print)
+{
+	uint32_t idx, val = 0xCDCDCDCD, align, arg;
+	struct atom_context *gctx = ctx->ctx;
+	arg = attr & 7;
+	align = (attr >> 3) & 7;
+	switch (arg) {
+	case ATOM_ARG_REG:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		if (print)
+			DEBUG("REG[0x%04X]", idx);
+		idx += gctx->reg_block;
+		switch (gctx->io_mode) {
+		case ATOM_IO_MM:
+			val = gctx->card->reg_read(gctx->card, idx);
+			break;
+		case ATOM_IO_PCI:
+			pr_info("PCI registers are not implemented\n");
+			return 0;
+		case ATOM_IO_SYSIO:
+			pr_info("SYSIO registers are not implemented\n");
+			return 0;
+		default:
+			if (!(gctx->io_mode & 0x80)) {
+				pr_info("Bad IO mode\n");
+				return 0;
+			}
+			if (!gctx->iio[gctx->io_mode & 0x7F]) {
+				pr_info("Undefined indirect IO read method %d\n",
+					gctx->io_mode & 0x7F);
+				return 0;
+			}
+			val =
+			    atom_iio_execute(gctx,
+					     gctx->iio[gctx->io_mode & 0x7F],
+					     idx, 0);
+		}
+		break;
+	case ATOM_ARG_PS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		/* get_unaligned_le32 avoids unaligned accesses from atombios
+		 * tables, noticed on a DEC Alpha. */
+		if (idx < ctx->ps_size)
+			val = get_unaligned_le32((u32 *)&ctx->ps[idx]);
+		else
+			pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
+		if (print)
+			DEBUG("PS[0x%02X,0x%04X]", idx, val);
+		break;
+	case ATOM_ARG_WS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("WS[0x%02X]", idx);
+		switch (idx) {
+		case ATOM_WS_QUOTIENT:
+			val = gctx->divmul[0];
+			break;
+		case ATOM_WS_REMAINDER:
+			val = gctx->divmul[1];
+			break;
+		case ATOM_WS_DATAPTR:
+			val = gctx->data_block;
+			break;
+		case ATOM_WS_SHIFT:
+			val = gctx->shift;
+			break;
+		case ATOM_WS_OR_MASK:
+			val = 1 << gctx->shift;
+			break;
+		case ATOM_WS_AND_MASK:
+			val = ~(1 << gctx->shift);
+			break;
+		case ATOM_WS_FB_WINDOW:
+			val = gctx->fb_base;
+			break;
+		case ATOM_WS_ATTRIBUTES:
+			val = gctx->io_attr;
+			break;
+		case ATOM_WS_REGPTR:
+			val = gctx->reg_block;
+			break;
+		default:
+			if (idx < ctx->ws_size)
+				val = ctx->ws[idx];
+			else
+				pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
+		}
+		break;
+	case ATOM_ARG_ID:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		if (print) {
+			if (gctx->data_block)
+				DEBUG("ID[0x%04X+%04X]", idx, gctx->data_block);
+			else
+				DEBUG("ID[0x%04X]", idx);
+		}
+		val = U32(idx + gctx->data_block);
+		break;
+	case ATOM_ARG_FB:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) {
+			DRM_ERROR("ATOM: fb read beyond scratch region: %d vs. %d\n",
+				  gctx->fb_base + (idx * 4), gctx->scratch_size_bytes);
+			val = 0;
+		} else
+			val = gctx->scratch[(gctx->fb_base / 4) + idx];
+		if (print)
+			DEBUG("FB[0x%02X]", idx);
+		break;
+	case ATOM_ARG_IMM:
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			val = U32(*ptr);
+			(*ptr) += 4;
+			if (print)
+				DEBUG("IMM 0x%08X\n", val);
+			break;
+		case ATOM_SRC_WORD0:
+		case ATOM_SRC_WORD8:
+		case ATOM_SRC_WORD16:
+			val = U16(*ptr);
+			(*ptr) += 2;
+			if (print)
+				DEBUG("IMM 0x%04X\n", val);
+			break;
+		case ATOM_SRC_BYTE0:
+		case ATOM_SRC_BYTE8:
+		case ATOM_SRC_BYTE16:
+		case ATOM_SRC_BYTE24:
+			val = U8(*ptr);
+			(*ptr)++;
+			if (print)
+				DEBUG("IMM 0x%02X\n", val);
+			break;
+		}
+		return val;
+	case ATOM_ARG_PLL:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("PLL[0x%02X]", idx);
+		val = gctx->card->pll_read(gctx->card, idx);
+		break;
+	case ATOM_ARG_MC:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if (print)
+			DEBUG("MC[0x%02X]", idx);
+		val = gctx->card->mc_read(gctx->card, idx);
+		break;
+	}
+	if (saved)
+		*saved = val;
+	val &= atom_arg_mask[align];
+	val >>= atom_arg_shift[align];
+	if (print)
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			DEBUG(".[31:0] -> 0x%08X\n", val);
+			break;
+		case ATOM_SRC_WORD0:
+			DEBUG(".[15:0] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_WORD8:
+			DEBUG(".[23:8] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_WORD16:
+			DEBUG(".[31:16] -> 0x%04X\n", val);
+			break;
+		case ATOM_SRC_BYTE0:
+			DEBUG(".[7:0] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE8:
+			DEBUG(".[15:8] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE16:
+			DEBUG(".[23:16] -> 0x%02X\n", val);
+			break;
+		case ATOM_SRC_BYTE24:
+			DEBUG(".[31:24] -> 0x%02X\n", val);
+			break;
+		}
+	return val;
+}
+
+static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+	uint32_t align = (attr >> 3) & 7, arg = attr & 7;
+	switch (arg) {
+	case ATOM_ARG_REG:
+	case ATOM_ARG_ID:
+		(*ptr) += 2;
+		break;
+	case ATOM_ARG_PLL:
+	case ATOM_ARG_MC:
+	case ATOM_ARG_PS:
+	case ATOM_ARG_WS:
+	case ATOM_ARG_FB:
+		(*ptr)++;
+		break;
+	case ATOM_ARG_IMM:
+		switch (align) {
+		case ATOM_SRC_DWORD:
+			(*ptr) += 4;
+			return;
+		case ATOM_SRC_WORD0:
+		case ATOM_SRC_WORD8:
+		case ATOM_SRC_WORD16:
+			(*ptr) += 2;
+			return;
+		case ATOM_SRC_BYTE0:
+		case ATOM_SRC_BYTE8:
+		case ATOM_SRC_BYTE16:
+		case ATOM_SRC_BYTE24:
+			(*ptr)++;
+			return;
+		}
+	}
+}
+
+static uint32_t atom_get_src(atom_exec_context *ctx, uint8_t attr, int *ptr)
+{
+	return atom_get_src_int(ctx, attr, ptr, NULL, 1);
+}
+
+static uint32_t atom_get_src_direct(atom_exec_context *ctx, uint8_t align, int *ptr)
+{
+	uint32_t val = 0xCDCDCDCD;
+
+	switch (align) {
+	case ATOM_SRC_DWORD:
+		val = U32(*ptr);
+		(*ptr) += 4;
+		break;
+	case ATOM_SRC_WORD0:
+	case ATOM_SRC_WORD8:
+	case ATOM_SRC_WORD16:
+		val = U16(*ptr);
+		(*ptr) += 2;
+		break;
+	case ATOM_SRC_BYTE0:
+	case ATOM_SRC_BYTE8:
+	case ATOM_SRC_BYTE16:
+	case ATOM_SRC_BYTE24:
+		val = U8(*ptr);
+		(*ptr)++;
+		break;
+	}
+	return val;
+}
+
+static uint32_t atom_get_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+			     int *ptr, uint32_t *saved, int print)
+{
+	return atom_get_src_int(ctx,
+				arg | atom_dst_to_src[(attr >> 3) &
+						      7][(attr >> 6) & 3] << 3,
+				ptr, saved, print);
+}
+
+static void atom_skip_dst(atom_exec_context *ctx, int arg, uint8_t attr, int *ptr)
+{
+	atom_skip_src_int(ctx,
+			  arg | atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) &
+								 3] << 3, ptr);
+}
+
+static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr,
+			 int *ptr, uint32_t val, uint32_t saved)
+{
+	uint32_t align =
+	    atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3], old_val =
+	    val, idx;
+	struct atom_context *gctx = ctx->ctx;
+	old_val &= atom_arg_mask[align] >> atom_arg_shift[align];
+	val <<= atom_arg_shift[align];
+	val &= atom_arg_mask[align];
+	saved &= ~atom_arg_mask[align];
+	val |= saved;
+	switch (arg) {
+	case ATOM_ARG_REG:
+		idx = U16(*ptr);
+		(*ptr) += 2;
+		DEBUG("REG[0x%04X]", idx);
+		idx += gctx->reg_block;
+		switch (gctx->io_mode) {
+		case ATOM_IO_MM:
+			if (idx == 0)
+				gctx->card->reg_write(gctx->card, idx,
+						      val << 2);
+			else
+				gctx->card->reg_write(gctx->card, idx, val);
+			break;
+		case ATOM_IO_PCI:
+			pr_info("PCI registers are not implemented\n");
+			return;
+		case ATOM_IO_SYSIO:
+			pr_info("SYSIO registers are not implemented\n");
+			return;
+		default:
+			if (!(gctx->io_mode & 0x80)) {
+				pr_info("Bad IO mode\n");
+				return;
+			}
+			if (!gctx->iio[gctx->io_mode & 0xFF]) {
+				pr_info("Undefined indirect IO write method %d\n",
+					gctx->io_mode & 0x7F);
+				return;
+			}
+			atom_iio_execute(gctx, gctx->iio[gctx->io_mode & 0xFF],
+					 idx, val);
+		}
+		break;
+	case ATOM_ARG_PS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("PS[0x%02X]", idx);
+		if (idx >= ctx->ps_size) {
+			pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size);
+			return;
+		}
+		ctx->ps[idx] = cpu_to_le32(val);
+		break;
+	case ATOM_ARG_WS:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("WS[0x%02X]", idx);
+		switch (idx) {
+		case ATOM_WS_QUOTIENT:
+			gctx->divmul[0] = val;
+			break;
+		case ATOM_WS_REMAINDER:
+			gctx->divmul[1] = val;
+			break;
+		case ATOM_WS_DATAPTR:
+			gctx->data_block = val;
+			break;
+		case ATOM_WS_SHIFT:
+			gctx->shift = val;
+			break;
+		case ATOM_WS_OR_MASK:
+		case ATOM_WS_AND_MASK:
+			break;
+		case ATOM_WS_FB_WINDOW:
+			gctx->fb_base = val;
+			break;
+		case ATOM_WS_ATTRIBUTES:
+			gctx->io_attr = val;
+			break;
+		case ATOM_WS_REGPTR:
+			gctx->reg_block = val;
+			break;
+		default:
+			if (idx >= ctx->ws_size) {
+				pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size);
+				return;
+			}
+			ctx->ws[idx] = val;
+		}
+		break;
+	case ATOM_ARG_FB:
+		idx = U8(*ptr);
+		(*ptr)++;
+		if ((gctx->fb_base + (idx * 4)) > gctx->scratch_size_bytes) {
+			DRM_ERROR("ATOM: fb write beyond scratch region: %d vs. %d\n",
+				  gctx->fb_base + (idx * 4), gctx->scratch_size_bytes);
+		} else
+			gctx->scratch[(gctx->fb_base / 4) + idx] = val;
+		DEBUG("FB[0x%02X]", idx);
+		break;
+	case ATOM_ARG_PLL:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("PLL[0x%02X]", idx);
+		gctx->card->pll_write(gctx->card, idx, val);
+		break;
+	case ATOM_ARG_MC:
+		idx = U8(*ptr);
+		(*ptr)++;
+		DEBUG("MC[0x%02X]", idx);
+		gctx->card->mc_write(gctx->card, idx, val);
+		return;
+	}
+	switch (align) {
+	case ATOM_SRC_DWORD:
+		DEBUG(".[31:0] <- 0x%08X\n", old_val);
+		break;
+	case ATOM_SRC_WORD0:
+		DEBUG(".[15:0] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_WORD8:
+		DEBUG(".[23:8] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_WORD16:
+		DEBUG(".[31:16] <- 0x%04X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE0:
+		DEBUG(".[7:0] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE8:
+		DEBUG(".[15:8] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE16:
+		DEBUG(".[23:16] <- 0x%02X\n", old_val);
+		break;
+	case ATOM_SRC_BYTE24:
+		DEBUG(".[31:24] <- 0x%02X\n", old_val);
+		break;
+	}
+}
+
+static void atom_op_add(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst += src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_and(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst &= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_beep(atom_exec_context *ctx, int *ptr, int arg)
+{
+	printk("ATOM BIOS beeped!\n");
+}
+
+static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int idx = U8((*ptr)++);
+	int r = 0;
+
+	if (idx < ATOM_TABLE_NAMES_CNT)
+		SDEBUG("   table: %d (%s)\n", idx, atom_table_names[idx]);
+	else
+		SDEBUG("   table: %d\n", idx);
+	if (U16(ctx->ctx->cmd_table + 4 + 2 * idx))
+		r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift);
+	if (r) {
+		ctx->abort = true;
+	}
+}
+
+static void atom_op_clear(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t saved;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, 0, saved);
+}
+
+static void atom_op_compare(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->cs_equal = (dst == src);
+	ctx->ctx->cs_above = (dst > src);
+	SDEBUG("   result: %s %s\n", ctx->ctx->cs_equal ? "EQ" : "NE",
+	       ctx->ctx->cs_above ? "GT" : "LE");
+}
+
+static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg)
+{
+	unsigned count = U8((*ptr)++);
+	SDEBUG("   count: %d\n", count);
+	if (arg == ATOM_UNIT_MICROSEC)
+		udelay(count);
+	else if (!drm_can_sleep())
+		mdelay(count);
+	else
+		msleep(count);
+}
+
+static void atom_op_div(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	if (src != 0) {
+		ctx->ctx->divmul[0] = dst / src;
+		ctx->ctx->divmul[1] = dst % src;
+	} else {
+		ctx->ctx->divmul[0] = 0;
+		ctx->ctx->divmul[1] = 0;
+	}
+}
+
+static void atom_op_div32(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint64_t val64;
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	if (src != 0) {
+		val64 = dst;
+		val64 |= ((uint64_t)ctx->ctx->divmul[1]) << 32;
+		do_div(val64, src);
+		ctx->ctx->divmul[0] = lower_32_bits(val64);
+		ctx->ctx->divmul[1] = upper_32_bits(val64);
+	} else {
+		ctx->ctx->divmul[0] = 0;
+		ctx->ctx->divmul[1] = 0;
+	}
+}
+
+static void atom_op_eot(atom_exec_context *ctx, int *ptr, int arg)
+{
+	/* functionally, a nop */
+}
+
+static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int execute = 0, target = U16(*ptr);
+	unsigned long cjiffies;
+
+	(*ptr) += 2;
+	switch (arg) {
+	case ATOM_COND_ABOVE:
+		execute = ctx->ctx->cs_above;
+		break;
+	case ATOM_COND_ABOVEOREQUAL:
+		execute = ctx->ctx->cs_above || ctx->ctx->cs_equal;
+		break;
+	case ATOM_COND_ALWAYS:
+		execute = 1;
+		break;
+	case ATOM_COND_BELOW:
+		execute = !(ctx->ctx->cs_above || ctx->ctx->cs_equal);
+		break;
+	case ATOM_COND_BELOWOREQUAL:
+		execute = !ctx->ctx->cs_above;
+		break;
+	case ATOM_COND_EQUAL:
+		execute = ctx->ctx->cs_equal;
+		break;
+	case ATOM_COND_NOTEQUAL:
+		execute = !ctx->ctx->cs_equal;
+		break;
+	}
+	if (arg != ATOM_COND_ALWAYS)
+		SDEBUG("   taken: %s\n", str_yes_no(execute));
+	SDEBUG("   target: 0x%04X\n", target);
+	if (execute) {
+		if (ctx->last_jump == (ctx->start + target)) {
+			cjiffies = jiffies;
+			if (time_after(cjiffies, ctx->last_jump_jiffies)) {
+				cjiffies -= ctx->last_jump_jiffies;
+				if ((jiffies_to_msecs(cjiffies) > ATOM_CMD_TIMEOUT_SEC*1000)) {
+					DRM_ERROR("atombios stuck in loop for more than %dsecs aborting\n",
+						  ATOM_CMD_TIMEOUT_SEC);
+					ctx->abort = true;
+				}
+			} else {
+				/* jiffies wrap around we will just wait a little longer */
+				ctx->last_jump_jiffies = jiffies;
+			}
+		} else {
+			ctx->last_jump = ctx->start + target;
+			ctx->last_jump_jiffies = jiffies;
+		}
+		*ptr = ctx->start + target;
+	}
+}
+
+static void atom_op_mask(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, mask, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	mask = atom_get_src_direct(ctx, ((attr >> 3) & 7), ptr);
+	SDEBUG("   mask: 0x%08x", mask);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst &= mask;
+	dst |= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_move(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t src, saved;
+	int dptr = *ptr;
+	if (((attr >> 3) & 7) != ATOM_SRC_DWORD)
+		atom_get_dst(ctx, arg, attr, ptr, &saved, 0);
+	else {
+		atom_skip_dst(ctx, arg, attr, ptr);
+		saved = 0xCDCDCDCD;
+	}
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, src, saved);
+}
+
+static void atom_op_mul(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->divmul[0] = dst * src;
+}
+
+static void atom_op_mul32(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint64_t val64;
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	val64 = (uint64_t)dst * (uint64_t)src;
+	ctx->ctx->divmul[0] = lower_32_bits(val64);
+	ctx->ctx->divmul[1] = upper_32_bits(val64);
+}
+
+static void atom_op_nop(atom_exec_context *ctx, int *ptr, int arg)
+{
+	/* nothing */
+}
+
+static void atom_op_or(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst |= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_postcard(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t val = U8((*ptr)++);
+	SDEBUG("POST card output: 0x%02X\n", val);
+}
+
+static void atom_op_repeat(atom_exec_context *ctx, int *ptr, int arg)
+{
+	pr_info("unimplemented!\n");
+}
+
+static void atom_op_restorereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+	pr_info("unimplemented!\n");
+}
+
+static void atom_op_savereg(atom_exec_context *ctx, int *ptr, int arg)
+{
+	pr_info("unimplemented!\n");
+}
+
+static void atom_op_setdatablock(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int idx = U8(*ptr);
+	(*ptr)++;
+	SDEBUG("   block: %d\n", idx);
+	if (!idx)
+		ctx->ctx->data_block = 0;
+	else if (idx == 255)
+		ctx->ctx->data_block = ctx->start;
+	else
+		ctx->ctx->data_block = U16(ctx->ctx->data_table + 4 + 2 * idx);
+	SDEBUG("   base: 0x%04X\n", ctx->ctx->data_block);
+}
+
+static void atom_op_setfbbase(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	SDEBUG("   fb_base: ");
+	ctx->ctx->fb_base = atom_get_src(ctx, attr, ptr);
+}
+
+static void atom_op_setport(atom_exec_context *ctx, int *ptr, int arg)
+{
+	int port;
+	switch (arg) {
+	case ATOM_PORT_ATI:
+		port = U16(*ptr);
+		if (port < ATOM_IO_NAMES_CNT)
+			SDEBUG("   port: %d (%s)\n", port, atom_io_names[port]);
+		else
+			SDEBUG("   port: %d\n", port);
+		if (!port)
+			ctx->ctx->io_mode = ATOM_IO_MM;
+		else
+			ctx->ctx->io_mode = ATOM_IO_IIO | port;
+		(*ptr) += 2;
+		break;
+	case ATOM_PORT_PCI:
+		ctx->ctx->io_mode = ATOM_IO_PCI;
+		(*ptr)++;
+		break;
+	case ATOM_PORT_SYSIO:
+		ctx->ctx->io_mode = ATOM_IO_SYSIO;
+		(*ptr)++;
+		break;
+	}
+}
+
+static void atom_op_setregblock(atom_exec_context *ctx, int *ptr, int arg)
+{
+	ctx->ctx->reg_block = U16(*ptr);
+	(*ptr) += 2;
+	SDEBUG("   base: 0x%04X\n", ctx->ctx->reg_block);
+}
+
+static void atom_op_shift_left(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	shift = atom_get_src_direct(ctx, ATOM_SRC_BYTE0, ptr);
+	SDEBUG("   shift: %d\n", shift);
+	dst <<= shift;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shift_right(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	attr &= 0x38;
+	attr |= atom_def_dst[attr >> 3] << 6;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	shift = atom_get_src_direct(ctx, ATOM_SRC_BYTE0, ptr);
+	SDEBUG("   shift: %d\n", shift);
+	dst >>= shift;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shl(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3];
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	/* op needs to full dst value */
+	dst = saved;
+	shift = atom_get_src(ctx, attr, ptr);
+	SDEBUG("   shift: %d\n", shift);
+	dst <<= shift;
+	dst &= atom_arg_mask[dst_align];
+	dst >>= atom_arg_shift[dst_align];
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_shr(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++), shift;
+	uint32_t saved, dst;
+	int dptr = *ptr;
+	uint32_t dst_align = atom_dst_to_src[(attr >> 3) & 7][(attr >> 6) & 3];
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	/* op needs to full dst value */
+	dst = saved;
+	shift = atom_get_src(ctx, attr, ptr);
+	SDEBUG("   shift: %d\n", shift);
+	dst >>= shift;
+	dst &= atom_arg_mask[dst_align];
+	dst >>= atom_arg_shift[dst_align];
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_sub(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst -= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_switch(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t src, val, target;
+	SDEBUG("   switch: ");
+	src = atom_get_src(ctx, attr, ptr);
+	while (U16(*ptr) != ATOM_CASE_END)
+		if (U8(*ptr) == ATOM_CASE_MAGIC) {
+			(*ptr)++;
+			SDEBUG("   case: ");
+			val =
+			    atom_get_src(ctx, (attr & 0x38) | ATOM_ARG_IMM,
+					 ptr);
+			target = U16(*ptr);
+			if (val == src) {
+				SDEBUG("   target: %04X\n", target);
+				*ptr = ctx->start + target;
+				return;
+			}
+			(*ptr) += 2;
+		} else {
+			pr_info("Bad case\n");
+			return;
+		}
+	(*ptr) += 2;
+}
+
+static void atom_op_test(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src;
+	SDEBUG("   src1: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, NULL, 1);
+	SDEBUG("   src2: ");
+	src = atom_get_src(ctx, attr, ptr);
+	ctx->ctx->cs_equal = ((dst & src) == 0);
+	SDEBUG("   result: %s\n", ctx->ctx->cs_equal ? "EQ" : "NE");
+}
+
+static void atom_op_xor(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t attr = U8((*ptr)++);
+	uint32_t dst, src, saved;
+	int dptr = *ptr;
+	SDEBUG("   dst: ");
+	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
+	SDEBUG("   src: ");
+	src = atom_get_src(ctx, attr, ptr);
+	dst ^= src;
+	SDEBUG("   dst: ");
+	atom_put_dst(ctx, arg, attr, &dptr, dst, saved);
+}
+
+static void atom_op_debug(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint8_t val = U8((*ptr)++);
+	SDEBUG("DEBUG output: 0x%02X\n", val);
+}
+
+static void atom_op_processds(atom_exec_context *ctx, int *ptr, int arg)
+{
+	uint16_t val = U16(*ptr);
+	(*ptr) += val + 2;
+	SDEBUG("PROCESSDS output: 0x%02X\n", val);
+}
+
+static struct {
+	void (*func) (atom_exec_context *, int *, int);
+	int arg;
+} opcode_table[ATOM_OP_CNT] = {
+	{
+	NULL, 0}, {
+	atom_op_move, ATOM_ARG_REG}, {
+	atom_op_move, ATOM_ARG_PS}, {
+	atom_op_move, ATOM_ARG_WS}, {
+	atom_op_move, ATOM_ARG_FB}, {
+	atom_op_move, ATOM_ARG_PLL}, {
+	atom_op_move, ATOM_ARG_MC}, {
+	atom_op_and, ATOM_ARG_REG}, {
+	atom_op_and, ATOM_ARG_PS}, {
+	atom_op_and, ATOM_ARG_WS}, {
+	atom_op_and, ATOM_ARG_FB}, {
+	atom_op_and, ATOM_ARG_PLL}, {
+	atom_op_and, ATOM_ARG_MC}, {
+	atom_op_or, ATOM_ARG_REG}, {
+	atom_op_or, ATOM_ARG_PS}, {
+	atom_op_or, ATOM_ARG_WS}, {
+	atom_op_or, ATOM_ARG_FB}, {
+	atom_op_or, ATOM_ARG_PLL}, {
+	atom_op_or, ATOM_ARG_MC}, {
+	atom_op_shift_left, ATOM_ARG_REG}, {
+	atom_op_shift_left, ATOM_ARG_PS}, {
+	atom_op_shift_left, ATOM_ARG_WS}, {
+	atom_op_shift_left, ATOM_ARG_FB}, {
+	atom_op_shift_left, ATOM_ARG_PLL}, {
+	atom_op_shift_left, ATOM_ARG_MC}, {
+	atom_op_shift_right, ATOM_ARG_REG}, {
+	atom_op_shift_right, ATOM_ARG_PS}, {
+	atom_op_shift_right, ATOM_ARG_WS}, {
+	atom_op_shift_right, ATOM_ARG_FB}, {
+	atom_op_shift_right, ATOM_ARG_PLL}, {
+	atom_op_shift_right, ATOM_ARG_MC}, {
+	atom_op_mul, ATOM_ARG_REG}, {
+	atom_op_mul, ATOM_ARG_PS}, {
+	atom_op_mul, ATOM_ARG_WS}, {
+	atom_op_mul, ATOM_ARG_FB}, {
+	atom_op_mul, ATOM_ARG_PLL}, {
+	atom_op_mul, ATOM_ARG_MC}, {
+	atom_op_div, ATOM_ARG_REG}, {
+	atom_op_div, ATOM_ARG_PS}, {
+	atom_op_div, ATOM_ARG_WS}, {
+	atom_op_div, ATOM_ARG_FB}, {
+	atom_op_div, ATOM_ARG_PLL}, {
+	atom_op_div, ATOM_ARG_MC}, {
+	atom_op_add, ATOM_ARG_REG}, {
+	atom_op_add, ATOM_ARG_PS}, {
+	atom_op_add, ATOM_ARG_WS}, {
+	atom_op_add, ATOM_ARG_FB}, {
+	atom_op_add, ATOM_ARG_PLL}, {
+	atom_op_add, ATOM_ARG_MC}, {
+	atom_op_sub, ATOM_ARG_REG}, {
+	atom_op_sub, ATOM_ARG_PS}, {
+	atom_op_sub, ATOM_ARG_WS}, {
+	atom_op_sub, ATOM_ARG_FB}, {
+	atom_op_sub, ATOM_ARG_PLL}, {
+	atom_op_sub, ATOM_ARG_MC}, {
+	atom_op_setport, ATOM_PORT_ATI}, {
+	atom_op_setport, ATOM_PORT_PCI}, {
+	atom_op_setport, ATOM_PORT_SYSIO}, {
+	atom_op_setregblock, 0}, {
+	atom_op_setfbbase, 0}, {
+	atom_op_compare, ATOM_ARG_REG}, {
+	atom_op_compare, ATOM_ARG_PS}, {
+	atom_op_compare, ATOM_ARG_WS}, {
+	atom_op_compare, ATOM_ARG_FB}, {
+	atom_op_compare, ATOM_ARG_PLL}, {
+	atom_op_compare, ATOM_ARG_MC}, {
+	atom_op_switch, 0}, {
+	atom_op_jump, ATOM_COND_ALWAYS}, {
+	atom_op_jump, ATOM_COND_EQUAL}, {
+	atom_op_jump, ATOM_COND_BELOW}, {
+	atom_op_jump, ATOM_COND_ABOVE}, {
+	atom_op_jump, ATOM_COND_BELOWOREQUAL}, {
+	atom_op_jump, ATOM_COND_ABOVEOREQUAL}, {
+	atom_op_jump, ATOM_COND_NOTEQUAL}, {
+	atom_op_test, ATOM_ARG_REG}, {
+	atom_op_test, ATOM_ARG_PS}, {
+	atom_op_test, ATOM_ARG_WS}, {
+	atom_op_test, ATOM_ARG_FB}, {
+	atom_op_test, ATOM_ARG_PLL}, {
+	atom_op_test, ATOM_ARG_MC}, {
+	atom_op_delay, ATOM_UNIT_MILLISEC}, {
+	atom_op_delay, ATOM_UNIT_MICROSEC}, {
+	atom_op_calltable, 0}, {
+	atom_op_repeat, 0}, {
+	atom_op_clear, ATOM_ARG_REG}, {
+	atom_op_clear, ATOM_ARG_PS}, {
+	atom_op_clear, ATOM_ARG_WS}, {
+	atom_op_clear, ATOM_ARG_FB}, {
+	atom_op_clear, ATOM_ARG_PLL}, {
+	atom_op_clear, ATOM_ARG_MC}, {
+	atom_op_nop, 0}, {
+	atom_op_eot, 0}, {
+	atom_op_mask, ATOM_ARG_REG}, {
+	atom_op_mask, ATOM_ARG_PS}, {
+	atom_op_mask, ATOM_ARG_WS}, {
+	atom_op_mask, ATOM_ARG_FB}, {
+	atom_op_mask, ATOM_ARG_PLL}, {
+	atom_op_mask, ATOM_ARG_MC}, {
+	atom_op_postcard, 0}, {
+	atom_op_beep, 0}, {
+	atom_op_savereg, 0}, {
+	atom_op_restorereg, 0}, {
+	atom_op_setdatablock, 0}, {
+	atom_op_xor, ATOM_ARG_REG}, {
+	atom_op_xor, ATOM_ARG_PS}, {
+	atom_op_xor, ATOM_ARG_WS}, {
+	atom_op_xor, ATOM_ARG_FB}, {
+	atom_op_xor, ATOM_ARG_PLL}, {
+	atom_op_xor, ATOM_ARG_MC}, {
+	atom_op_shl, ATOM_ARG_REG}, {
+	atom_op_shl, ATOM_ARG_PS}, {
+	atom_op_shl, ATOM_ARG_WS}, {
+	atom_op_shl, ATOM_ARG_FB}, {
+	atom_op_shl, ATOM_ARG_PLL}, {
+	atom_op_shl, ATOM_ARG_MC}, {
+	atom_op_shr, ATOM_ARG_REG}, {
+	atom_op_shr, ATOM_ARG_PS}, {
+	atom_op_shr, ATOM_ARG_WS}, {
+	atom_op_shr, ATOM_ARG_FB}, {
+	atom_op_shr, ATOM_ARG_PLL}, {
+	atom_op_shr, ATOM_ARG_MC}, {
+	atom_op_debug, 0}, {
+	atom_op_processds, 0}, {
+	atom_op_mul32, ATOM_ARG_PS}, {
+	atom_op_mul32, ATOM_ARG_WS}, {
+	atom_op_div32, ATOM_ARG_PS}, {
+	atom_op_div32, ATOM_ARG_WS},
+};
+
+static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size)
+{
+	int base = CU16(ctx->cmd_table + 4 + 2 * index);
+	int len, ws, ps, ptr;
+	unsigned char op;
+	atom_exec_context ectx;
+	int ret = 0;
+
+	if (!base)
+		return -EINVAL;
+
+	len = CU16(base + ATOM_CT_SIZE_PTR);
+	ws = CU8(base + ATOM_CT_WS_PTR);
+	ps = CU8(base + ATOM_CT_PS_PTR) & ATOM_CT_PS_MASK;
+	ptr = base + ATOM_CT_CODE_PTR;
+
+	SDEBUG(">> execute %04X (len %d, WS %d, PS %d)\n", base, len, ws, ps);
+
+	ectx.ctx = ctx;
+	ectx.ps_shift = ps / 4;
+	ectx.start = base;
+	ectx.ps = params;
+	ectx.ps_size = params_size;
+	ectx.abort = false;
+	ectx.last_jump = 0;
+	ectx.last_jump_jiffies = 0;
+	if (ws) {
+		ectx.ws = kcalloc(4, ws, GFP_KERNEL);
+		if (!ectx.ws) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		ectx.ws_size = ws;
+	} else {
+		ectx.ws = NULL;
+		ectx.ws_size = 0;
+	}
+
+	debug_depth++;
+	while (1) {
+		op = CU8(ptr++);
+		if (op < ATOM_OP_NAMES_CNT)
+			SDEBUG("%s @ 0x%04X\n", atom_op_names[op], ptr - 1);
+		else
+			SDEBUG("[%d] @ 0x%04X\n", op, ptr - 1);
+		if (ectx.abort) {
+			DRM_ERROR("atombios stuck executing %04X (len %d, WS %d, PS %d) @ 0x%04X\n",
+				base, len, ws, ps, ptr - 1);
+			ret = -EINVAL;
+			goto free;
+		}
+
+		if (op < ATOM_OP_CNT && op > 0)
+			opcode_table[op].func(&ectx, &ptr,
+					      opcode_table[op].arg);
+		else
+			break;
+
+		if (op == ATOM_OP_EOT)
+			break;
+	}
+	debug_depth--;
+	SDEBUG("<<\n");
+
+free:
+	if (ws)
+		kfree(ectx.ws);
+	return ret;
+}
+
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size)
+{
+	int r;
+
+	mutex_lock(&ctx->mutex);
+	/* reset data block */
+	ctx->data_block = 0;
+	/* reset reg block */
+	ctx->reg_block = 0;
+	/* reset fb window */
+	ctx->fb_base = 0;
+	/* reset io mode */
+	ctx->io_mode = ATOM_IO_MM;
+	/* reset divmul */
+	ctx->divmul[0] = 0;
+	ctx->divmul[1] = 0;
+	r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size);
+	mutex_unlock(&ctx->mutex);
+	return r;
+}
+
+static int atom_iio_len[] = { 1, 2, 3, 3, 3, 3, 4, 4, 4, 3 };
+
+static void atom_index_iio(struct atom_context *ctx, int base)
+{
+	ctx->iio = kzalloc(2 * 256, GFP_KERNEL);
+	if (!ctx->iio)
+		return;
+	while (CU8(base) == ATOM_IIO_START) {
+		ctx->iio[CU8(base + 1)] = base + 2;
+		base += 2;
+		while (CU8(base) != ATOM_IIO_END)
+			base += atom_iio_len[CU8(base)];
+		base += 3;
+	}
+}
+
+static void atom_get_vbios_name(struct atom_context *ctx)
+{
+	unsigned char *p_rom;
+	unsigned char str_num;
+	unsigned short off_to_vbios_str;
+	unsigned char *c_ptr;
+	int name_size;
+	int i;
+
+	const char *na = "--N/A--";
+	char *back;
+
+	p_rom = ctx->bios;
+
+	str_num = *(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS);
+	if (str_num != 0) {
+		off_to_vbios_str =
+			*(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+		c_ptr = (unsigned char *)(p_rom + off_to_vbios_str);
+	} else {
+		/* do not know where to find name */
+		memcpy(ctx->name, na, 7);
+		ctx->name[7] = 0;
+		return;
+	}
+
+	/*
+	 * skip the atombios strings, usually 4
+	 * 1st is P/N, 2nd is ASIC, 3rd is PCI type, 4th is Memory type
+	 */
+	for (i = 0; i < str_num; i++) {
+		while (*c_ptr != 0)
+			c_ptr++;
+		c_ptr++;
+	}
+
+	/* skip the following 2 chars: 0x0D 0x0A */
+	c_ptr += 2;
+
+	name_size = strnlen(c_ptr, STRLEN_LONG - 1);
+	memcpy(ctx->name, c_ptr, name_size);
+	back = ctx->name + name_size;
+	while ((*--back) == ' ')
+		;
+	*(back + 1) = '\0';
+}
+
+static void atom_get_vbios_date(struct atom_context *ctx)
+{
+	unsigned char *p_rom;
+	unsigned char *date_in_rom;
+
+	p_rom = ctx->bios;
+
+	date_in_rom = p_rom + OFFSET_TO_VBIOS_DATE;
+
+	ctx->date[0] = '2';
+	ctx->date[1] = '0';
+	ctx->date[2] = date_in_rom[6];
+	ctx->date[3] = date_in_rom[7];
+	ctx->date[4] = '/';
+	ctx->date[5] = date_in_rom[0];
+	ctx->date[6] = date_in_rom[1];
+	ctx->date[7] = '/';
+	ctx->date[8] = date_in_rom[3];
+	ctx->date[9] = date_in_rom[4];
+	ctx->date[10] = ' ';
+	ctx->date[11] = date_in_rom[9];
+	ctx->date[12] = date_in_rom[10];
+	ctx->date[13] = date_in_rom[11];
+	ctx->date[14] = date_in_rom[12];
+	ctx->date[15] = date_in_rom[13];
+	ctx->date[16] = '\0';
+}
+
+static unsigned char *atom_find_str_in_rom(struct atom_context *ctx, char *str, int start,
+					   int end, int maxlen)
+{
+	unsigned long str_off;
+	unsigned char *p_rom;
+	unsigned short str_len;
+
+	str_off = 0;
+	str_len = strnlen(str, maxlen);
+	p_rom = ctx->bios;
+
+	for (; start <= end; ++start) {
+		for (str_off = 0; str_off < str_len; ++str_off) {
+			if (str[str_off] != *(p_rom + start + str_off))
+				break;
+		}
+
+		if (str_off == str_len || str[str_off] == 0)
+			return p_rom + start;
+	}
+	return NULL;
+}
+
+static void atom_get_vbios_pn(struct atom_context *ctx)
+{
+	unsigned char *p_rom;
+	unsigned short off_to_vbios_str;
+	unsigned char *vbios_str;
+	int count;
+
+	off_to_vbios_str = 0;
+	p_rom = ctx->bios;
+
+	if (*(p_rom + OFFSET_TO_GET_ATOMBIOS_NUMBER_OF_STRINGS) != 0) {
+		off_to_vbios_str =
+			*(unsigned short *)(p_rom + OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+		vbios_str = (unsigned char *)(p_rom + off_to_vbios_str);
+	} else {
+		vbios_str = p_rom + OFFSET_TO_VBIOS_PART_NUMBER;
+	}
+
+	if (*vbios_str == 0) {
+		vbios_str = atom_find_str_in_rom(ctx, BIOS_ATOM_PREFIX, 3, 1024, 64);
+		if (vbios_str == NULL)
+			vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
+	}
+	OPTIMIZER_HIDE_VAR(vbios_str);
+	if (vbios_str != NULL && *vbios_str == 0)
+		vbios_str++;
+
+	if (vbios_str != NULL) {
+		count = 0;
+		while ((count < BIOS_STRING_LENGTH) && vbios_str[count] >= ' ' &&
+		       vbios_str[count] <= 'z') {
+			ctx->vbios_pn[count] = vbios_str[count];
+			count++;
+		}
+
+		ctx->vbios_pn[count] = 0;
+	}
+
+	pr_info("ATOM BIOS: %s\n", ctx->vbios_pn);
+}
+
+static void atom_get_vbios_version(struct atom_context *ctx)
+{
+	unsigned short start = 3, end;
+	unsigned char *vbios_ver;
+	unsigned char *p_rom;
+
+	p_rom = ctx->bios;
+	/* Search from strings offset if it's present */
+	start = *(unsigned short *)(p_rom +
+				    OFFSET_TO_GET_ATOMBIOS_STRING_START);
+
+	/* Search till atom rom header start point */
+	end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER);
+
+	/* Use hardcoded offsets, if the offsets are not populated */
+	if (end <= start) {
+		start = 3;
+		end = 1024;
+	}
+
+	/* find anchor ATOMBIOSBK-AMD */
+	vbios_ver =
+		atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64);
+	if (vbios_ver != NULL) {
+		/* skip ATOMBIOSBK-AMD VER */
+		vbios_ver += 18;
+		memcpy(ctx->vbios_ver_str, vbios_ver, STRLEN_NORMAL);
+	} else {
+		ctx->vbios_ver_str[0] = '\0';
+	}
+}
+
+static void atom_get_vbios_build(struct atom_context *ctx)
+{
+	unsigned char *atom_rom_hdr;
+	unsigned char *str;
+	uint16_t base, len;
+
+	base = CU16(ATOM_ROM_TABLE_PTR);
+	atom_rom_hdr = CSTR(base);
+
+	str = CSTR(CU16(base + ATOM_ROM_CFG_PTR));
+	/* Skip config string */
+	while (str < atom_rom_hdr && *str++)
+		;
+	/* Skip change list string */
+	while (str < atom_rom_hdr && *str++)
+		;
+
+	len = min(atom_rom_hdr - str, STRLEN_NORMAL);
+	if (len)
+		strscpy(ctx->build_num, str, len);
+}
+
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
+{
+	int base;
+	struct atom_context *ctx =
+	    kzalloc(sizeof(struct atom_context), GFP_KERNEL);
+	struct _ATOM_ROM_HEADER *atom_rom_header;
+	struct _ATOM_MASTER_DATA_TABLE *master_table;
+	struct _ATOM_FIRMWARE_INFO *atom_fw_info;
+
+	if (!ctx)
+		return NULL;
+
+	ctx->card = card;
+	ctx->bios = bios;
+
+	if (CU16(0) != ATOM_BIOS_MAGIC) {
+		pr_info("Invalid BIOS magic\n");
+		kfree(ctx);
+		return NULL;
+	}
+	if (strncmp
+	    (CSTR(ATOM_ATI_MAGIC_PTR), ATOM_ATI_MAGIC,
+	     strlen(ATOM_ATI_MAGIC))) {
+		pr_info("Invalid ATI magic\n");
+		kfree(ctx);
+		return NULL;
+	}
+
+	base = CU16(ATOM_ROM_TABLE_PTR);
+	if (strncmp
+	    (CSTR(base + ATOM_ROM_MAGIC_PTR), ATOM_ROM_MAGIC,
+	     strlen(ATOM_ROM_MAGIC))) {
+		pr_info("Invalid ATOM magic\n");
+		kfree(ctx);
+		return NULL;
+	}
+
+	ctx->cmd_table = CU16(base + ATOM_ROM_CMD_PTR);
+	ctx->data_table = CU16(base + ATOM_ROM_DATA_PTR);
+	atom_index_iio(ctx, CU16(ctx->data_table + ATOM_DATA_IIO_PTR) + 4);
+	if (!ctx->iio) {
+		amdgpu_atom_destroy(ctx);
+		return NULL;
+	}
+
+	atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base);
+	if (atom_rom_header->usMasterDataTableOffset != 0) {
+		master_table = (struct _ATOM_MASTER_DATA_TABLE *)
+				CSTR(atom_rom_header->usMasterDataTableOffset);
+		if (master_table->ListOfDataTables.FirmwareInfo != 0) {
+			atom_fw_info = (struct _ATOM_FIRMWARE_INFO *)
+					CSTR(master_table->ListOfDataTables.FirmwareInfo);
+			ctx->version = atom_fw_info->ulFirmwareRevision;
+		}
+	}
+
+	atom_get_vbios_name(ctx);
+	atom_get_vbios_pn(ctx);
+	atom_get_vbios_date(ctx);
+	atom_get_vbios_version(ctx);
+	atom_get_vbios_build(ctx);
+
+	return ctx;
+}
+
+int amdgpu_atom_asic_init(struct atom_context *ctx)
+{
+	int hwi = CU16(ctx->data_table + ATOM_DATA_FWI_PTR);
+	uint32_t ps[16];
+	int ret;
+
+	memset(ps, 0, 64);
+
+	ps[0] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFSCLK_PTR));
+	ps[1] = cpu_to_le32(CU32(hwi + ATOM_FWI_DEFMCLK_PTR));
+	if (!ps[0] || !ps[1])
+		return 1;
+
+	if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT))
+		return 1;
+	ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16);
+	if (ret)
+		return ret;
+
+	memset(ps, 0, 64);
+
+	return ret;
+}
+
+void amdgpu_atom_destroy(struct atom_context *ctx)
+{
+	kfree(ctx->iio);
+	kfree(ctx);
+}
+
+bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index,
+			    uint16_t *size, uint8_t *frev, uint8_t *crev,
+			    uint16_t *data_start)
+{
+	int offset = index * 2 + 4;
+	int idx = CU16(ctx->data_table + offset);
+	u16 *mdt = (u16 *)(ctx->bios + ctx->data_table + 4);
+
+	if (!mdt[index])
+		return false;
+
+	if (size)
+		*size = CU16(idx);
+	if (frev)
+		*frev = CU8(idx + 2);
+	if (crev)
+		*crev = CU8(idx + 3);
+	*data_start = idx;
+	return true;
+}
+
+bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev,
+			   uint8_t *crev)
+{
+	int offset = index * 2 + 4;
+	int idx = CU16(ctx->cmd_table + offset);
+	u16 *mct = (u16 *)(ctx->bios + ctx->cmd_table + 4);
+
+	if (!mct[index])
+		return false;
+
+	if (frev)
+		*frev = CU8(idx + 2);
+	if (crev)
+		*crev = CU8(idx + 3);
+	return true;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
new file mode 100644
index 000000000000..825ff28731f5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Stanislaw Skowronek
+ */
+
+#ifndef ATOM_H
+#define ATOM_H
+
+#include <linux/types.h>
+
+struct drm_device;
+
+#define ATOM_BIOS_MAGIC		0xAA55
+#define ATOM_ATI_MAGIC_PTR	0x30
+#define ATOM_ATI_MAGIC		" 761295520"
+#define ATOM_ROM_TABLE_PTR	0x48
+
+#define ATOM_ROM_MAGIC		"ATOM"
+#define ATOM_ROM_MAGIC_PTR	4
+
+#define ATOM_ROM_CFG_PTR 0xC
+#define ATOM_ROM_MSG_PTR	0x10
+#define ATOM_ROM_CMD_PTR	0x1E
+#define ATOM_ROM_DATA_PTR	0x20
+
+#define ATOM_CMD_INIT		0
+#define ATOM_CMD_SETSCLK	0x0A
+#define ATOM_CMD_SETMCLK	0x0B
+#define ATOM_CMD_SETPCLK	0x0C
+#define ATOM_CMD_SPDFANCNTL	0x39
+
+#define ATOM_DATA_FWI_PTR	0xC
+#define ATOM_DATA_IIO_PTR	0x32
+
+#define ATOM_FWI_DEFSCLK_PTR	8
+#define ATOM_FWI_DEFMCLK_PTR	0xC
+#define ATOM_FWI_MAXSCLK_PTR	0x24
+#define ATOM_FWI_MAXMCLK_PTR	0x28
+
+#define ATOM_CT_SIZE_PTR	0
+#define ATOM_CT_WS_PTR		4
+#define ATOM_CT_PS_PTR		5
+#define ATOM_CT_PS_MASK		0x7F
+#define ATOM_CT_CODE_PTR	6
+
+#define ATOM_OP_CNT		127
+#define ATOM_OP_EOT		91
+
+#define ATOM_CASE_MAGIC		0x63
+#define ATOM_CASE_END		0x5A5A
+
+#define ATOM_ARG_REG		0
+#define ATOM_ARG_PS		1
+#define ATOM_ARG_WS		2
+#define ATOM_ARG_FB		3
+#define ATOM_ARG_ID		4
+#define ATOM_ARG_IMM		5
+#define ATOM_ARG_PLL		6
+#define ATOM_ARG_MC		7
+
+#define ATOM_SRC_DWORD		0
+#define ATOM_SRC_WORD0		1
+#define ATOM_SRC_WORD8		2
+#define ATOM_SRC_WORD16		3
+#define ATOM_SRC_BYTE0		4
+#define ATOM_SRC_BYTE8		5
+#define ATOM_SRC_BYTE16		6
+#define ATOM_SRC_BYTE24		7
+
+#define ATOM_WS_QUOTIENT	0x40
+#define ATOM_WS_REMAINDER	0x41
+#define ATOM_WS_DATAPTR		0x42
+#define ATOM_WS_SHIFT		0x43
+#define ATOM_WS_OR_MASK		0x44
+#define ATOM_WS_AND_MASK	0x45
+#define ATOM_WS_FB_WINDOW	0x46
+#define ATOM_WS_ATTRIBUTES	0x47
+#define ATOM_WS_REGPTR		0x48
+
+#define ATOM_IIO_NOP		0
+#define ATOM_IIO_START		1
+#define ATOM_IIO_READ		2
+#define ATOM_IIO_WRITE		3
+#define ATOM_IIO_CLEAR		4
+#define ATOM_IIO_SET		5
+#define ATOM_IIO_MOVE_INDEX	6
+#define ATOM_IIO_MOVE_ATTR	7
+#define ATOM_IIO_MOVE_DATA	8
+#define ATOM_IIO_END		9
+
+#define ATOM_IO_MM		0
+#define ATOM_IO_PCI		1
+#define ATOM_IO_SYSIO		2
+#define ATOM_IO_IIO		0x80
+
+#define STRLEN_NORMAL		32
+#define STRLEN_LONG		64
+#define STRLEN_VERYLONG		254
+
+struct card_info {
+	struct drm_device *dev;
+	void (*reg_write)(struct card_info *info,
+			  u32 reg, uint32_t val);   /*  filled by driver */
+	uint32_t (*reg_read)(struct card_info *info, uint32_t reg);          /*  filled by driver */
+	void (*mc_write)(struct card_info *info,
+			 u32 reg, uint32_t val);   /*  filled by driver */
+	uint32_t (*mc_read)(struct card_info *info, uint32_t reg);          /*  filled by driver */
+	void (*pll_write)(struct card_info *info,
+			  u32 reg, uint32_t val);   /*  filled by driver */
+	uint32_t (*pll_read)(struct card_info *info, uint32_t reg);          /*  filled by driver */
+};
+
+struct atom_context {
+	struct card_info *card;
+	struct mutex mutex;
+	void *bios;
+	uint32_t cmd_table, data_table;
+	uint16_t *iio;
+
+	uint16_t data_block;
+	uint32_t fb_base;
+	uint32_t divmul[2];
+	uint16_t io_attr;
+	uint16_t reg_block;
+	uint8_t shift;
+	int cs_equal, cs_above;
+	int io_mode;
+	uint32_t *scratch;
+	int scratch_size_bytes;
+
+	uint8_t name[STRLEN_LONG];
+	uint8_t vbios_pn[STRLEN_LONG];
+	uint32_t version;
+	uint8_t vbios_ver_str[STRLEN_NORMAL];
+	uint8_t date[STRLEN_NORMAL];
+	uint8_t build_num[STRLEN_NORMAL];
+};
+
+extern int amdgpu_atom_debug;
+
+struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios);
+int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size);
+int amdgpu_atom_asic_init(struct atom_context *ctx);
+void amdgpu_atom_destroy(struct atom_context *ctx);
+bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size,
+			    uint8_t *frev, uint8_t *crev, uint16_t *data_start);
+bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index,
+			   uint8_t *frev, uint8_t *crev);
+#include "atom-types.h"
+#include "atombios.h"
+#include "ObjectID.h"
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
new file mode 100644
index 000000000000..3dfc28840a7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_fixed.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "atombios_crtc.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+
+void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	SET_CRTC_OVERSCAN_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_OverScan);
+	int a1, a2;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = amdgpu_crtc->crtc_id;
+
+	switch (amdgpu_crtc->rmx_type) {
+	case RMX_CENTER:
+		args.usOverscanTop = cpu_to_le16((adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2);
+		args.usOverscanBottom = cpu_to_le16((adjusted_mode->crtc_vdisplay - mode->crtc_vdisplay) / 2);
+		args.usOverscanLeft = cpu_to_le16((adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2);
+		args.usOverscanRight = cpu_to_le16((adjusted_mode->crtc_hdisplay - mode->crtc_hdisplay) / 2);
+		break;
+	case RMX_ASPECT:
+		a1 = mode->crtc_vdisplay * adjusted_mode->crtc_hdisplay;
+		a2 = adjusted_mode->crtc_vdisplay * mode->crtc_hdisplay;
+
+		if (a1 > a2) {
+			args.usOverscanLeft = cpu_to_le16((adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2);
+			args.usOverscanRight = cpu_to_le16((adjusted_mode->crtc_hdisplay - (a2 / mode->crtc_vdisplay)) / 2);
+		} else if (a2 > a1) {
+			args.usOverscanTop = cpu_to_le16((adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2);
+			args.usOverscanBottom = cpu_to_le16((adjusted_mode->crtc_vdisplay - (a1 / mode->crtc_hdisplay)) / 2);
+		}
+		break;
+	case RMX_FULL:
+	default:
+		args.usOverscanRight = cpu_to_le16(amdgpu_crtc->h_border);
+		args.usOverscanLeft = cpu_to_le16(amdgpu_crtc->h_border);
+		args.usOverscanBottom = cpu_to_le16(amdgpu_crtc->v_border);
+		args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border);
+		break;
+	}
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	ENABLE_SCALER_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableScaler);
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucScaler = amdgpu_crtc->crtc_id;
+
+	switch (amdgpu_crtc->rmx_type) {
+	case RMX_FULL:
+		args.ucEnable = ATOM_SCALER_EXPANSION;
+		break;
+	case RMX_CENTER:
+		args.ucEnable = ATOM_SCALER_CENTER;
+		break;
+	case RMX_ASPECT:
+		args.ucEnable = ATOM_SCALER_EXPANSION;
+		break;
+	default:
+		args.ucEnable = ATOM_SCALER_DISABLE;
+		break;
+	}
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int index =
+	    GetIndexIntoMasterTable(COMMAND, UpdateCRTC_DoubleBufferRegisters);
+	ENABLE_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = amdgpu_crtc->crtc_id;
+	args.ucEnable = lock;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int index = GetIndexIntoMasterTable(COMMAND, EnableCRTC);
+	ENABLE_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = amdgpu_crtc->crtc_id;
+	args.ucEnable = state;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int index = GetIndexIntoMasterTable(COMMAND, BlankCRTC);
+	BLANK_CRTC_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucCRTC = amdgpu_crtc->crtc_id;
+	args.ucBlanking = state;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucDispPipeId = amdgpu_crtc->crtc_id;
+	args.ucEnable = state;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
+{
+	int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
+	ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
+
+	memset(&args, 0, sizeof(args));
+
+	args.ucEnable = ATOM_INIT;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	SET_CRTC_USING_DTD_TIMING_PARAMETERS args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetCRTC_UsingDTDTiming);
+	u16 misc = 0;
+
+	memset(&args, 0, sizeof(args));
+	args.usH_Size = cpu_to_le16(mode->crtc_hdisplay - (amdgpu_crtc->h_border * 2));
+	args.usH_Blanking_Time =
+		cpu_to_le16(mode->crtc_hblank_end - mode->crtc_hdisplay + (amdgpu_crtc->h_border * 2));
+	args.usV_Size = cpu_to_le16(mode->crtc_vdisplay - (amdgpu_crtc->v_border * 2));
+	args.usV_Blanking_Time =
+		cpu_to_le16(mode->crtc_vblank_end - mode->crtc_vdisplay + (amdgpu_crtc->v_border * 2));
+	args.usH_SyncOffset =
+		cpu_to_le16(mode->crtc_hsync_start - mode->crtc_hdisplay + amdgpu_crtc->h_border);
+	args.usH_SyncWidth =
+		cpu_to_le16(mode->crtc_hsync_end - mode->crtc_hsync_start);
+	args.usV_SyncOffset =
+		cpu_to_le16(mode->crtc_vsync_start - mode->crtc_vdisplay + amdgpu_crtc->v_border);
+	args.usV_SyncWidth =
+		cpu_to_le16(mode->crtc_vsync_end - mode->crtc_vsync_start);
+	args.ucH_Border = amdgpu_crtc->h_border;
+	args.ucV_Border = amdgpu_crtc->v_border;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		misc |= ATOM_VSYNC_POLARITY;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		misc |= ATOM_HSYNC_POLARITY;
+	if (mode->flags & DRM_MODE_FLAG_CSYNC)
+		misc |= ATOM_COMPOSITESYNC;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		misc |= ATOM_INTERLACE;
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		misc |= ATOM_DOUBLE_CLOCK_MODE;
+
+	args.susModeMiscInfo.usAccess = cpu_to_le16(misc);
+	args.ucCRTC = amdgpu_crtc->crtc_id;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+union atom_enable_ss {
+	ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION v1;
+	ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2 v2;
+	ENABLE_SPREAD_SPECTRUM_ON_PPLL_V3 v3;
+};
+
+static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev,
+				     int enable,
+				     int pll_id,
+				     int crtc_id,
+				     struct amdgpu_atom_ss *ss)
+{
+	unsigned i;
+	int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL);
+	union atom_enable_ss args;
+
+	if (enable) {
+		/* Don't mess with SS if percentage is 0 or external ss.
+		 * SS is already disabled previously, and disabling it
+		 * again can cause display problems if the pll is already
+		 * programmed.
+		 */
+		if (ss->percentage == 0)
+			return;
+		if (ss->type & ATOM_EXTERNAL_SS_MASK)
+			return;
+	} else {
+		for (i = 0; i < adev->mode_info.num_crtc; i++) {
+			if (adev->mode_info.crtcs[i] &&
+			    adev->mode_info.crtcs[i]->enabled &&
+			    i != crtc_id &&
+			    pll_id == adev->mode_info.crtcs[i]->pll_id) {
+				/* one other crtc is using this pll don't turn
+				 * off spread spectrum as it might turn off
+				 * display on active crtc
+				 */
+				return;
+			}
+		}
+	}
+
+	memset(&args, 0, sizeof(args));
+
+	args.v3.usSpreadSpectrumAmountFrac = cpu_to_le16(0);
+	args.v3.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
+	switch (pll_id) {
+	case ATOM_PPLL1:
+		args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL;
+		break;
+	case ATOM_PPLL2:
+		args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P2PLL;
+		break;
+	case ATOM_DCPLL:
+		args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_DCPLL;
+		break;
+	case ATOM_PPLL_INVALID:
+		return;
+	}
+	args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+	args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
+	args.v3.ucEnable = enable;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+union adjust_pixel_clock {
+	ADJUST_DISPLAY_PLL_PS_ALLOCATION v1;
+	ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3 v3;
+};
+
+static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc,
+				    struct drm_display_mode *mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_encoder *encoder = amdgpu_crtc->encoder;
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	u32 adjusted_clock = mode->clock;
+	int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+	u32 dp_clock = mode->clock;
+	u32 clock = mode->clock;
+	int bpc = amdgpu_crtc->bpc;
+	bool is_duallink = amdgpu_dig_monitor_is_duallink(encoder, mode->clock);
+	union adjust_pixel_clock args;
+	u8 frev, crev;
+	int index;
+
+	amdgpu_crtc->pll_flags = AMDGPU_PLL_USE_FRAC_FB_DIV;
+
+	if ((amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)) {
+		if (connector) {
+			struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+			struct amdgpu_connector_atom_dig *dig_connector =
+				amdgpu_connector->con_priv;
+
+			dp_clock = dig_connector->dp_clock;
+		}
+	}
+
+	/* use recommended ref_div for ss */
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+		if (amdgpu_crtc->ss_enabled) {
+			if (amdgpu_crtc->ss.refdiv) {
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_REF_DIV;
+				amdgpu_crtc->pll_reference_div = amdgpu_crtc->ss.refdiv;
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+			}
+		}
+	}
+
+	/* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
+	if (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
+		adjusted_clock = mode->clock * 2;
+	if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+		amdgpu_crtc->pll_flags |= AMDGPU_PLL_PREFER_CLOSEST_LOWER;
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+		amdgpu_crtc->pll_flags |= AMDGPU_PLL_IS_LCD;
+
+
+	/* adjust pll for deep color modes */
+	if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+		switch (bpc) {
+		case 8:
+		default:
+			break;
+		case 10:
+			clock = (clock * 5) / 4;
+			break;
+		case 12:
+			clock = (clock * 3) / 2;
+			break;
+		case 16:
+			clock = clock * 2;
+			break;
+		}
+	}
+
+	/* DCE3+ has an AdjustDisplayPll that will adjust the pixel clock
+	 * accordingly based on the encoder/transmitter to work around
+	 * special hw requirements.
+	 */
+	index = GetIndexIntoMasterTable(COMMAND, AdjustDisplayPll);
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+				   &crev))
+		return adjusted_clock;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+		case 2:
+			args.v1.usPixelClock = cpu_to_le16(clock / 10);
+			args.v1.ucTransmitterID = amdgpu_encoder->encoder_id;
+			args.v1.ucEncodeMode = encoder_mode;
+			if (amdgpu_crtc->ss_enabled && amdgpu_crtc->ss.percentage)
+				args.v1.ucConfig |=
+					ADJUST_DISPLAY_CONFIG_SS_ENABLE;
+
+			amdgpu_atom_execute_table(adev->mode_info.atom_context,
+					   index, (uint32_t *)&args, sizeof(args));
+			adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
+			break;
+		case 3:
+			args.v3.sInput.usPixelClock = cpu_to_le16(clock / 10);
+			args.v3.sInput.ucTransmitterID = amdgpu_encoder->encoder_id;
+			args.v3.sInput.ucEncodeMode = encoder_mode;
+			args.v3.sInput.ucDispPllConfig = 0;
+			if (amdgpu_crtc->ss_enabled && amdgpu_crtc->ss.percentage)
+				args.v3.sInput.ucDispPllConfig |=
+					DISPPLL_CONFIG_SS_ENABLE;
+			if (ENCODER_MODE_IS_DP(encoder_mode)) {
+				args.v3.sInput.ucDispPllConfig |=
+					DISPPLL_CONFIG_COHERENT_MODE;
+				/* 16200 or 27000 */
+				args.v3.sInput.usPixelClock = cpu_to_le16(dp_clock / 10);
+			} else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+				if (dig->coherent_mode)
+					args.v3.sInput.ucDispPllConfig |=
+						DISPPLL_CONFIG_COHERENT_MODE;
+				if (is_duallink)
+					args.v3.sInput.ucDispPllConfig |=
+						DISPPLL_CONFIG_DUAL_LINK;
+			}
+			if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+			    ENCODER_OBJECT_ID_NONE)
+				args.v3.sInput.ucExtTransmitterID =
+					amdgpu_encoder_get_dp_bridge_encoder_id(encoder);
+			else
+				args.v3.sInput.ucExtTransmitterID = 0;
+
+			amdgpu_atom_execute_table(adev->mode_info.atom_context,
+					   index, (uint32_t *)&args, sizeof(args));
+			adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
+			if (args.v3.sOutput.ucRefDiv) {
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_REF_DIV;
+				amdgpu_crtc->pll_reference_div = args.v3.sOutput.ucRefDiv;
+			}
+			if (args.v3.sOutput.ucPostDiv) {
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV;
+				amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_POST_DIV;
+				amdgpu_crtc->pll_post_div = args.v3.sOutput.ucPostDiv;
+			}
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return adjusted_clock;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return adjusted_clock;
+	}
+
+	return adjusted_clock;
+}
+
+union set_pixel_clock {
+	SET_PIXEL_CLOCK_PS_ALLOCATION base;
+	PIXEL_CLOCK_PARAMETERS v1;
+	PIXEL_CLOCK_PARAMETERS_V2 v2;
+	PIXEL_CLOCK_PARAMETERS_V3 v3;
+	PIXEL_CLOCK_PARAMETERS_V5 v5;
+	PIXEL_CLOCK_PARAMETERS_V6 v6;
+	PIXEL_CLOCK_PARAMETERS_V7 v7;
+};
+
+/* on DCE5, make sure the voltage is high enough to support the
+ * required disp clk.
+ */
+void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+					   u32 dispclk)
+{
+	u8 frev, crev;
+	int index;
+	union set_pixel_clock args;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+				   &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 5:
+			/* if the default dcpll clock is specified,
+			 * SetPixelClock provides the dividers
+			 */
+			args.v5.ucCRTC = ATOM_CRTC_INVALID;
+			args.v5.usPixelClock = cpu_to_le16(dispclk);
+			args.v5.ucPpll = ATOM_DCPLL;
+			break;
+		case 6:
+			/* if the default dcpll clock is specified,
+			 * SetPixelClock provides the dividers
+			 */
+			args.v6.ulDispEngClkFreq = cpu_to_le32(dispclk);
+			if (adev->asic_type == CHIP_TAHITI ||
+			    adev->asic_type == CHIP_PITCAIRN ||
+			    adev->asic_type == CHIP_VERDE ||
+			    adev->asic_type == CHIP_OLAND)
+				args.v6.ucPpll = ATOM_PPLL0;
+			else
+				args.v6.ucPpll = ATOM_EXT_PLL1;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return;
+	}
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+union set_dce_clock {
+	SET_DCE_CLOCK_PS_ALLOCATION_V1_1 v1_1;
+	SET_DCE_CLOCK_PS_ALLOCATION_V2_1 v2_1;
+};
+
+u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
+				       u32 freq, u8 clk_type, u8 clk_src)
+{
+	u8 frev, crev;
+	int index;
+	union set_dce_clock args;
+	u32 ret_freq = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, SetDCEClock);
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+				   &crev))
+		return 0;
+
+	switch (frev) {
+	case 2:
+		switch (crev) {
+		case 1:
+			args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */
+			args.v2_1.asParam.ucDCEClkType = clk_type;
+			args.v2_1.asParam.ucDCEClkSrc = clk_src;
+			amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+			ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return 0;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return 0;
+	}
+
+	return ret_freq;
+}
+
+static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id)
+{
+	if (ENCODER_MODE_IS_DP(encoder_mode)) {
+		if (pll_id < ATOM_EXT_PLL1)
+			return true;
+		else
+			return false;
+	} else {
+		return true;
+	}
+}
+
+void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+				      u32 crtc_id,
+				      int pll_id,
+				      u32 encoder_mode,
+				      u32 encoder_id,
+				      u32 clock,
+				      u32 ref_div,
+				      u32 fb_div,
+				      u32 frac_fb_div,
+				      u32 post_div,
+				      int bpc,
+				      bool ss_enabled,
+				      struct amdgpu_atom_ss *ss)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u8 frev, crev;
+	int index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+	union set_pixel_clock args;
+
+	memset(&args, 0, sizeof(args));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev,
+				   &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+			if (clock == ATOM_DISABLE)
+				return;
+			args.v1.usPixelClock = cpu_to_le16(clock / 10);
+			args.v1.usRefDiv = cpu_to_le16(ref_div);
+			args.v1.usFbDiv = cpu_to_le16(fb_div);
+			args.v1.ucFracFbDiv = frac_fb_div;
+			args.v1.ucPostDiv = post_div;
+			args.v1.ucPpll = pll_id;
+			args.v1.ucCRTC = crtc_id;
+			args.v1.ucRefDivSrc = 1;
+			break;
+		case 2:
+			args.v2.usPixelClock = cpu_to_le16(clock / 10);
+			args.v2.usRefDiv = cpu_to_le16(ref_div);
+			args.v2.usFbDiv = cpu_to_le16(fb_div);
+			args.v2.ucFracFbDiv = frac_fb_div;
+			args.v2.ucPostDiv = post_div;
+			args.v2.ucPpll = pll_id;
+			args.v2.ucCRTC = crtc_id;
+			args.v2.ucRefDivSrc = 1;
+			break;
+		case 3:
+			args.v3.usPixelClock = cpu_to_le16(clock / 10);
+			args.v3.usRefDiv = cpu_to_le16(ref_div);
+			args.v3.usFbDiv = cpu_to_le16(fb_div);
+			args.v3.ucFracFbDiv = frac_fb_div;
+			args.v3.ucPostDiv = post_div;
+			args.v3.ucPpll = pll_id;
+			if (crtc_id == ATOM_CRTC2)
+				args.v3.ucMiscInfo = PIXEL_CLOCK_MISC_CRTC_SEL_CRTC2;
+			else
+				args.v3.ucMiscInfo = PIXEL_CLOCK_MISC_CRTC_SEL_CRTC1;
+			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
+				args.v3.ucMiscInfo |= PIXEL_CLOCK_MISC_REF_DIV_SRC;
+			args.v3.ucTransmitterId = encoder_id;
+			args.v3.ucEncoderMode = encoder_mode;
+			break;
+		case 5:
+			args.v5.ucCRTC = crtc_id;
+			args.v5.usPixelClock = cpu_to_le16(clock / 10);
+			args.v5.ucRefDiv = ref_div;
+			args.v5.usFbDiv = cpu_to_le16(fb_div);
+			args.v5.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+			args.v5.ucPostDiv = post_div;
+			args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
+			if ((ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) &&
+			    (pll_id < ATOM_EXT_PLL1))
+				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_REF_DIV_SRC;
+			if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+				switch (bpc) {
+				case 8:
+				default:
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP;
+					break;
+				case 10:
+					/* yes this is correct, the atom define is wrong */
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_32BPP;
+					break;
+				case 12:
+					/* yes this is correct, the atom define is wrong */
+					args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP;
+					break;
+				}
+			}
+			args.v5.ucTransmitterID = encoder_id;
+			args.v5.ucEncoderMode = encoder_mode;
+			args.v5.ucPpll = pll_id;
+			break;
+		case 6:
+			args.v6.ulDispEngClkFreq = cpu_to_le32(crtc_id << 24 | clock / 10);
+			args.v6.ucRefDiv = ref_div;
+			args.v6.usFbDiv = cpu_to_le16(fb_div);
+			args.v6.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+			args.v6.ucPostDiv = post_div;
+			args.v6.ucMiscInfo = 0; /* HDMI depth, etc. */
+			if ((ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) &&
+			    (pll_id < ATOM_EXT_PLL1) &&
+			    !is_pixel_clock_source_from_pll(encoder_mode, pll_id))
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_REF_DIV_SRC;
+			if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+				switch (bpc) {
+				case 8:
+				default:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP;
+					break;
+				case 10:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP_V6;
+					break;
+				case 12:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP_V6;
+					break;
+				case 16:
+					args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP;
+					break;
+				}
+			}
+			args.v6.ucTransmitterID = encoder_id;
+			args.v6.ucEncoderMode = encoder_mode;
+			args.v6.ucPpll = pll_id;
+			break;
+		case 7:
+			args.v7.ulPixelClock = cpu_to_le32(clock * 10); /* 100 hz units */
+			args.v7.ucMiscInfo = 0;
+			if ((encoder_mode == ATOM_ENCODER_MODE_DVI) &&
+			    (clock > 165000))
+				args.v7.ucMiscInfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN;
+			args.v7.ucCRTC = crtc_id;
+			if (encoder_mode == ATOM_ENCODER_MODE_HDMI) {
+				switch (bpc) {
+				case 8:
+				default:
+					args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS;
+					break;
+				case 10:
+					args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4;
+					break;
+				case 12:
+					args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2;
+					break;
+				case 16:
+					args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1;
+					break;
+				}
+			}
+			args.v7.ucTransmitterID = encoder_id;
+			args.v7.ucEncoderMode = encoder_mode;
+			args.v7.ucPpll = pll_id;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return;
+	}
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder =
+		to_amdgpu_encoder(amdgpu_crtc->encoder);
+	int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
+
+	amdgpu_crtc->bpc = 8;
+	amdgpu_crtc->ss_enabled = false;
+
+	if ((amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(amdgpu_crtc->encoder) != ENCODER_OBJECT_ID_NONE)) {
+		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+		struct drm_connector *connector =
+			amdgpu_get_connector_for_encoder(amdgpu_crtc->encoder);
+		struct amdgpu_connector *amdgpu_connector =
+			to_amdgpu_connector(connector);
+		struct amdgpu_connector_atom_dig *dig_connector =
+			amdgpu_connector->con_priv;
+		int dp_clock;
+
+		/* Assign mode clock for hdmi deep color max clock limit check */
+		amdgpu_connector->pixelclock_for_modeset = mode->clock;
+		amdgpu_crtc->bpc = amdgpu_connector_get_monitor_bpc(connector);
+
+		switch (encoder_mode) {
+		case ATOM_ENCODER_MODE_DP_MST:
+		case ATOM_ENCODER_MODE_DP:
+			/* DP/eDP */
+			dp_clock = dig_connector->dp_clock / 10;
+			amdgpu_crtc->ss_enabled =
+				amdgpu_atombios_get_asic_ss_info(adev, &amdgpu_crtc->ss,
+								 ASIC_INTERNAL_SS_ON_DP,
+								 dp_clock);
+			break;
+		case ATOM_ENCODER_MODE_LVDS:
+			amdgpu_crtc->ss_enabled =
+				amdgpu_atombios_get_asic_ss_info(adev,
+								 &amdgpu_crtc->ss,
+								 dig->lcd_ss_id,
+								 mode->clock / 10);
+			break;
+		case ATOM_ENCODER_MODE_DVI:
+			amdgpu_crtc->ss_enabled =
+				amdgpu_atombios_get_asic_ss_info(adev,
+								 &amdgpu_crtc->ss,
+								 ASIC_INTERNAL_SS_ON_TMDS,
+								 mode->clock / 10);
+			break;
+		case ATOM_ENCODER_MODE_HDMI:
+			amdgpu_crtc->ss_enabled =
+				amdgpu_atombios_get_asic_ss_info(adev,
+								 &amdgpu_crtc->ss,
+								 ASIC_INTERNAL_SS_ON_HDMI,
+								 mode->clock / 10);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* adjust pixel clock as needed */
+	amdgpu_crtc->adjusted_clock = amdgpu_atombios_crtc_adjust_pll(crtc, mode);
+
+	return 0;
+}
+
+void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder =
+		to_amdgpu_encoder(amdgpu_crtc->encoder);
+	u32 pll_clock = mode->clock;
+	u32 clock = mode->clock;
+	u32 ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
+	struct amdgpu_pll *pll;
+	int encoder_mode = amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder);
+
+	/* pass the actual clock to amdgpu_atombios_crtc_program_pll for HDMI */
+	if ((encoder_mode == ATOM_ENCODER_MODE_HDMI) &&
+	    (amdgpu_crtc->bpc > 8))
+		clock = amdgpu_crtc->adjusted_clock;
+
+	switch (amdgpu_crtc->pll_id) {
+	case ATOM_PPLL1:
+		pll = &adev->clock.ppll[0];
+		break;
+	case ATOM_PPLL2:
+		pll = &adev->clock.ppll[1];
+		break;
+	case ATOM_PPLL0:
+	case ATOM_PPLL_INVALID:
+	default:
+		pll = &adev->clock.ppll[2];
+		break;
+	}
+
+	/* update pll params */
+	pll->flags = amdgpu_crtc->pll_flags;
+	pll->reference_div = amdgpu_crtc->pll_reference_div;
+	pll->post_div = amdgpu_crtc->pll_post_div;
+
+	amdgpu_pll_compute(adev, pll, amdgpu_crtc->adjusted_clock, &pll_clock,
+			    &fb_div, &frac_fb_div, &ref_div, &post_div);
+
+	amdgpu_atombios_crtc_program_ss(adev, ATOM_DISABLE, amdgpu_crtc->pll_id,
+				 amdgpu_crtc->crtc_id, &amdgpu_crtc->ss);
+
+	amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+				  encoder_mode, amdgpu_encoder->encoder_id, clock,
+				  ref_div, fb_div, frac_fb_div, post_div,
+				  amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss);
+
+	if (amdgpu_crtc->ss_enabled) {
+		/* calculate ss amount and step size */
+		u32 step_size;
+		u32 amount = (((fb_div * 10) + frac_fb_div) *
+			      (u32)amdgpu_crtc->ss.percentage) /
+			(100 * (u32)amdgpu_crtc->ss.percentage_divider);
+		amdgpu_crtc->ss.amount = (amount / 10) & ATOM_PPLL_SS_AMOUNT_V2_FBDIV_MASK;
+		amdgpu_crtc->ss.amount |= ((amount - (amount / 10)) << ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT) &
+			ATOM_PPLL_SS_AMOUNT_V2_NFRAC_MASK;
+		if (amdgpu_crtc->ss.type & ATOM_PPLL_SS_TYPE_V2_CENTRE_SPREAD)
+			step_size = (4 * amount * ref_div * ((u32)amdgpu_crtc->ss.rate * 2048)) /
+				(125 * 25 * pll->reference_freq / 100);
+		else
+			step_size = (2 * amount * ref_div * ((u32)amdgpu_crtc->ss.rate * 2048)) /
+				(125 * 25 * pll->reference_freq / 100);
+		amdgpu_crtc->ss.step = step_size;
+
+		amdgpu_atombios_crtc_program_ss(adev, ATOM_ENABLE, amdgpu_crtc->pll_id,
+					 amdgpu_crtc->crtc_id, &amdgpu_crtc->ss);
+	}
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
new file mode 100644
index 000000000000..0eeda8e3bf5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_CRTC_H__
+#define __ATOMBIOS_CRTC_H__
+
+void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode);
+void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc);
+void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock);
+void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state);
+void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev);
+void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode);
+void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev,
+				    u32 dispclk);
+u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev,
+				       u32 freq, u8 clk_type, u8 clk_src);
+void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc,
+			       u32 crtc_id,
+			       int pll_id,
+			       u32 encoder_mode,
+			       u32 encoder_id,
+			       u32 clock,
+			       u32 ref_div,
+			       u32 fb_div,
+			       u32 frac_fb_div,
+			       u32 post_div,
+			       int bpc,
+			       bool ss_enabled,
+			       struct amdgpu_atom_ss *ss);
+int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode);
+void amdgpu_atombios_crtc_set_pll(struct drm_crtc *crtc,
+			   struct drm_display_mode *mode);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
new file mode 100644
index 000000000000..492813ab1b54
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -0,0 +1,770 @@
+/*
+ * Copyright 2007-8 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+
+#include <drm/amdgpu_drm.h>
+#include <drm/display/drm_dp_helper.h>
+
+#include "amdgpu.h"
+
+#include "atom.h"
+#include "atom-bits.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_atombios.h"
+
+/* move these to drm_dp_helper.c/h */
+#define DP_LINK_CONFIGURATION_SIZE 9
+#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
+
+static char *voltage_names[] = {
+	"0.4V", "0.6V", "0.8V", "1.2V"
+};
+static char *pre_emph_names[] = {
+	"0dB", "3.5dB", "6dB", "9.5dB"
+};
+
+/***** amdgpu AUX functions *****/
+
+union aux_channel_transaction {
+	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
+	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
+};
+
+static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan,
+				      u8 *send, int send_bytes,
+				      u8 *recv, int recv_size,
+				      u8 delay, u8 *ack)
+{
+	struct drm_device *dev = chan->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	union aux_channel_transaction args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
+	unsigned char *base;
+	int recv_bytes;
+	int r = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	mutex_lock(&chan->mutex);
+
+	base = (unsigned char *)(adev->mode_info.atom_context->scratch + 1);
+
+	amdgpu_atombios_copy_swap(base, send, send_bytes, true);
+
+	args.v2.lpAuxRequest = cpu_to_le16((u16)(0 + 4));
+	args.v2.lpDataOut = cpu_to_le16((u16)(16 + 4));
+	args.v2.ucDataOutLen = 0;
+	args.v2.ucChannelID = chan->rec.i2c_id;
+	args.v2.ucDelay = delay / 10;
+	args.v2.ucHPD_ID = chan->rec.hpd;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+	*ack = args.v2.ucReplyStatus;
+
+	/* timeout */
+	if (args.v2.ucReplyStatus == 1) {
+		r = -ETIMEDOUT;
+		goto done;
+	}
+
+	/* flags not zero */
+	if (args.v2.ucReplyStatus == 2) {
+		DRM_DEBUG_KMS("dp_aux_ch flags not zero\n");
+		r = -EIO;
+		goto done;
+	}
+
+	/* error */
+	if (args.v2.ucReplyStatus == 3) {
+		DRM_DEBUG_KMS("dp_aux_ch error\n");
+		r = -EIO;
+		goto done;
+	}
+
+	recv_bytes = args.v1.ucDataOutLen;
+	if (recv_bytes > recv_size)
+		recv_bytes = recv_size;
+
+	if (recv && recv_size)
+		amdgpu_atombios_copy_swap(recv, base + 16, recv_bytes, false);
+
+	r = recv_bytes;
+done:
+	mutex_unlock(&chan->mutex);
+
+	return r;
+}
+
+#define BARE_ADDRESS_SIZE 3
+#define HEADER_SIZE (BARE_ADDRESS_SIZE + 1)
+
+static ssize_t
+amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
+{
+	struct amdgpu_i2c_chan *chan =
+		container_of(aux, struct amdgpu_i2c_chan, aux);
+	int ret;
+	u8 tx_buf[20];
+	size_t tx_size;
+	u8 ack, delay = 0;
+
+	if (WARN_ON(msg->size > 16))
+		return -E2BIG;
+
+	tx_buf[0] = msg->address & 0xff;
+	tx_buf[1] = msg->address >> 8;
+	tx_buf[2] = (msg->request << 4) |
+		((msg->address >> 16) & 0xf);
+	tx_buf[3] = msg->size ? (msg->size - 1) : 0;
+
+	switch (msg->request & ~DP_AUX_I2C_MOT) {
+	case DP_AUX_NATIVE_WRITE:
+	case DP_AUX_I2C_WRITE:
+		/* tx_size needs to be 4 even for bare address packets since the atom
+		 * table needs the info in tx_buf[3].
+		 */
+		tx_size = HEADER_SIZE + msg->size;
+		if (msg->size == 0)
+			tx_buf[3] |= BARE_ADDRESS_SIZE << 4;
+		else
+			tx_buf[3] |= tx_size << 4;
+		memcpy(tx_buf + HEADER_SIZE, msg->buffer, msg->size);
+		ret = amdgpu_atombios_dp_process_aux_ch(chan,
+						 tx_buf, tx_size, NULL, 0, delay, &ack);
+		if (ret >= 0)
+			/* Return payload size. */
+			ret = msg->size;
+		break;
+	case DP_AUX_NATIVE_READ:
+	case DP_AUX_I2C_READ:
+		/* tx_size needs to be 4 even for bare address packets since the atom
+		 * table needs the info in tx_buf[3].
+		 */
+		tx_size = HEADER_SIZE;
+		if (msg->size == 0)
+			tx_buf[3] |= BARE_ADDRESS_SIZE << 4;
+		else
+			tx_buf[3] |= tx_size << 4;
+		ret = amdgpu_atombios_dp_process_aux_ch(chan,
+						 tx_buf, tx_size, msg->buffer, msg->size, delay, &ack);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret >= 0)
+		msg->reply = ack >> 4;
+
+	return ret;
+}
+
+void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector)
+{
+	amdgpu_connector->ddc_bus->rec.hpd = amdgpu_connector->hpd.hpd;
+	amdgpu_connector->ddc_bus->aux.transfer = amdgpu_atombios_dp_aux_transfer;
+	amdgpu_connector->ddc_bus->aux.drm_dev = amdgpu_connector->base.dev;
+
+	drm_dp_aux_init(&amdgpu_connector->ddc_bus->aux);
+	amdgpu_connector->ddc_bus->has_aux = true;
+}
+
+/***** general DP utility functions *****/
+
+#define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_LEVEL_3
+#define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPH_LEVEL_3
+
+static void amdgpu_atombios_dp_get_adjust_train(const u8 link_status[DP_LINK_STATUS_SIZE],
+						int lane_count,
+						u8 train_set[4])
+{
+	u8 v = 0;
+	u8 p = 0;
+	int lane;
+
+	for (lane = 0; lane < lane_count; lane++) {
+		u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+		u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
+
+		DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n",
+			  lane,
+			  voltage_names[this_v >> DP_TRAIN_VOLTAGE_SWING_SHIFT],
+			  pre_emph_names[this_p >> DP_TRAIN_PRE_EMPHASIS_SHIFT]);
+
+		if (this_v > v)
+			v = this_v;
+		if (this_p > p)
+			p = this_p;
+	}
+
+	if (v >= DP_VOLTAGE_MAX)
+		v |= DP_TRAIN_MAX_SWING_REACHED;
+
+	if (p >= DP_PRE_EMPHASIS_MAX)
+		p |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+	DRM_DEBUG_KMS("using signal parameters: voltage %s pre_emph %s\n",
+		  voltage_names[(v & DP_TRAIN_VOLTAGE_SWING_MASK) >> DP_TRAIN_VOLTAGE_SWING_SHIFT],
+		  pre_emph_names[(p & DP_TRAIN_PRE_EMPHASIS_MASK) >> DP_TRAIN_PRE_EMPHASIS_SHIFT]);
+
+	for (lane = 0; lane < 4; lane++)
+		train_set[lane] = v | p;
+}
+
+/* convert bits per color to bits per pixel */
+/* get bpc from the EDID */
+static unsigned amdgpu_atombios_dp_convert_bpc_to_bpp(int bpc)
+{
+	if (bpc == 0)
+		return 24;
+	else
+		return bpc * 3;
+}
+
+/***** amdgpu specific DP functions *****/
+
+static int amdgpu_atombios_dp_get_dp_link_config(struct drm_connector *connector,
+						 const u8 dpcd[DP_DPCD_SIZE],
+						 unsigned pix_clock,
+						 unsigned *dp_lanes, unsigned *dp_rate)
+{
+	unsigned bpp =
+		amdgpu_atombios_dp_convert_bpc_to_bpp(amdgpu_connector_get_monitor_bpc(connector));
+	static const unsigned link_rates[3] = { 162000, 270000, 540000 };
+	unsigned max_link_rate = drm_dp_max_link_rate(dpcd);
+	unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
+	unsigned lane_num, i, max_pix_clock;
+
+	if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+	    ENCODER_OBJECT_ID_NUTMEG) {
+		for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+			max_pix_clock = (lane_num * 270000 * 8) / bpp;
+			if (max_pix_clock >= pix_clock) {
+				*dp_lanes = lane_num;
+				*dp_rate = 270000;
+				return 0;
+			}
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+			for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+				max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+				if (max_pix_clock >= pix_clock) {
+					*dp_lanes = lane_num;
+					*dp_rate = link_rates[i];
+					return 0;
+				}
+			}
+		}
+	}
+
+	return -EINVAL;
+}
+
+static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev,
+				      int action, int dp_clock,
+				      u8 ucconfig, u8 lane_num)
+{
+	DP_ENCODER_SERVICE_PARAMETERS args;
+	int index = GetIndexIntoMasterTable(COMMAND, DPEncoderService);
+
+	memset(&args, 0, sizeof(args));
+	args.ucLinkClock = dp_clock / 10;
+	args.ucConfig = ucconfig;
+	args.ucAction = action;
+	args.ucLaneNum = lane_num;
+	args.ucStatus = 0;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+	return args.ucStatus;
+}
+
+u8 amdgpu_atombios_dp_get_sinktype(struct amdgpu_connector *amdgpu_connector)
+{
+	struct drm_device *dev = amdgpu_connector->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	return amdgpu_atombios_dp_encoder_service(adev, ATOM_DP_ACTION_GET_SINK_TYPE, 0,
+					   amdgpu_connector->ddc_bus->rec.i2c_id, 0);
+}
+
+static void amdgpu_atombios_dp_probe_oui(struct amdgpu_connector *amdgpu_connector)
+{
+	struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+	u8 buf[3];
+
+	if (!(dig_connector->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT))
+		return;
+
+	if (drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_SINK_OUI, buf, 3) == 3)
+		DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
+			      buf[0], buf[1], buf[2]);
+
+	if (drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_BRANCH_OUI, buf, 3) == 3)
+		DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
+			      buf[0], buf[1], buf[2]);
+}
+
+static void amdgpu_atombios_dp_ds_ports(struct amdgpu_connector *amdgpu_connector)
+{
+	struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+	int ret;
+
+	if (dig_connector->dpcd[DP_DPCD_REV] > 0x10) {
+		ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux,
+				       DP_DOWNSTREAM_PORT_0,
+				       dig_connector->downstream_ports,
+				       DP_MAX_DOWNSTREAM_PORTS);
+		if (ret)
+			memset(dig_connector->downstream_ports, 0,
+			       DP_MAX_DOWNSTREAM_PORTS);
+	}
+}
+
+int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector)
+{
+	struct amdgpu_connector_atom_dig *dig_connector = amdgpu_connector->con_priv;
+	u8 msg[DP_DPCD_SIZE];
+	int ret;
+
+	ret = drm_dp_dpcd_read(&amdgpu_connector->ddc_bus->aux, DP_DPCD_REV,
+			       msg, DP_DPCD_SIZE);
+	if (ret == DP_DPCD_SIZE) {
+		memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
+
+		DRM_DEBUG_KMS("DPCD: %*ph\n", (int)sizeof(dig_connector->dpcd),
+			      dig_connector->dpcd);
+
+		amdgpu_atombios_dp_probe_oui(amdgpu_connector);
+		amdgpu_atombios_dp_ds_ports(amdgpu_connector);
+		return 0;
+	}
+
+	dig_connector->dpcd[0] = 0;
+	return -EINVAL;
+}
+
+int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
+			       struct drm_connector *connector)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+	u16 dp_bridge = amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector);
+	u8 tmp;
+
+	if (!amdgpu_connector->con_priv)
+		return panel_mode;
+
+	if (dp_bridge != ENCODER_OBJECT_ID_NONE) {
+		/* DP bridge chips */
+		if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
+				      DP_EDP_CONFIGURATION_CAP, &tmp) == 1) {
+			if (tmp & 1)
+				panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+			else if ((dp_bridge == ENCODER_OBJECT_ID_NUTMEG) ||
+				 (dp_bridge == ENCODER_OBJECT_ID_TRAVIS))
+				panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE;
+			else
+				panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+		}
+	} else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		/* eDP */
+		if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux,
+				      DP_EDP_CONFIGURATION_CAP, &tmp) == 1) {
+			if (tmp & 1)
+				panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE;
+		}
+	}
+
+	return panel_mode;
+}
+
+void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
+				 const struct drm_display_mode *mode)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *dig_connector;
+	int ret;
+
+	if (!amdgpu_connector->con_priv)
+		return;
+	dig_connector = amdgpu_connector->con_priv;
+
+	if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+	    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+		ret = amdgpu_atombios_dp_get_dp_link_config(connector, dig_connector->dpcd,
+							    mode->clock,
+							    &dig_connector->dp_lane_count,
+							    &dig_connector->dp_clock);
+		if (ret) {
+			dig_connector->dp_clock = 0;
+			dig_connector->dp_lane_count = 0;
+		}
+	}
+}
+
+int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
+				  const struct drm_display_mode *mode)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *dig_connector;
+	unsigned dp_lanes, dp_clock;
+	int ret;
+
+	if (!amdgpu_connector->con_priv)
+		return MODE_CLOCK_HIGH;
+	dig_connector = amdgpu_connector->con_priv;
+
+	ret = amdgpu_atombios_dp_get_dp_link_config(connector, dig_connector->dpcd,
+						    mode->clock, &dp_lanes, &dp_clock);
+	if (ret)
+		return MODE_CLOCK_HIGH;
+
+	if ((dp_clock == 540000) &&
+	    (!amdgpu_connector_is_dp12_capable(connector)))
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector)
+{
+	u8 link_status[DP_LINK_STATUS_SIZE];
+	struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
+
+	if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+					 link_status) < 0)
+		return false;
+	if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
+		return false;
+	return true;
+}
+
+void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
+				    u8 power_state)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct amdgpu_connector_atom_dig *dig_connector;
+
+	if (!amdgpu_connector->con_priv)
+		return;
+
+	dig_connector = amdgpu_connector->con_priv;
+
+	/* power up/down the sink */
+	if (dig_connector->dpcd[0] >= 0x11) {
+		drm_dp_dpcd_writeb(&amdgpu_connector->ddc_bus->aux,
+				   DP_SET_POWER, power_state);
+		usleep_range(1000, 2000);
+	}
+}
+
+struct amdgpu_atombios_dp_link_train_info {
+	struct amdgpu_device *adev;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	int dp_clock;
+	int dp_lane_count;
+	bool tp3_supported;
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+	u8 train_set[4];
+	u8 link_status[DP_LINK_STATUS_SIZE];
+	u8 tries;
+	struct drm_dp_aux *aux;
+};
+
+static void
+amdgpu_atombios_dp_update_vs_emph(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+	/* set the initial vs/emph on the source */
+	amdgpu_atombios_encoder_setup_dig_transmitter(dp_info->encoder,
+					       ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH,
+					       0, dp_info->train_set[0]); /* sets all lanes at once */
+
+	/* set the vs/emph on the sink */
+	drm_dp_dpcd_write(dp_info->aux, DP_TRAINING_LANE0_SET,
+			  dp_info->train_set, dp_info->dp_lane_count);
+}
+
+static void
+amdgpu_atombios_dp_set_tp(struct amdgpu_atombios_dp_link_train_info *dp_info, int tp)
+{
+	int rtp = 0;
+
+	/* set training pattern on the source */
+	switch (tp) {
+	case DP_TRAINING_PATTERN_1:
+		rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1;
+		break;
+	case DP_TRAINING_PATTERN_2:
+		rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2;
+		break;
+	case DP_TRAINING_PATTERN_3:
+		rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3;
+			break;
+	}
+	amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder, rtp, 0);
+
+	/* enable training pattern on the sink */
+	drm_dp_dpcd_writeb(dp_info->aux, DP_TRAINING_PATTERN_SET, tp);
+}
+
+static int
+amdgpu_atombios_dp_link_train_init(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(dp_info->encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u8 tmp;
+
+	/* power up the sink */
+	amdgpu_atombios_dp_set_rx_power_state(dp_info->connector, DP_SET_POWER_D0);
+
+	/* possibly enable downspread on the sink */
+	if (dp_info->dpcd[3] & 0x1)
+		drm_dp_dpcd_writeb(dp_info->aux,
+				   DP_DOWNSPREAD_CTRL, DP_SPREAD_AMP_0_5);
+	else
+		drm_dp_dpcd_writeb(dp_info->aux,
+				   DP_DOWNSPREAD_CTRL, 0);
+
+	if (dig->panel_mode == DP_PANEL_MODE_INTERNAL_DP2_MODE)
+		drm_dp_dpcd_writeb(dp_info->aux, DP_EDP_CONFIGURATION_SET, 1);
+
+	/* set the lane count on the sink */
+	tmp = dp_info->dp_lane_count;
+	if (drm_dp_enhanced_frame_cap(dp_info->dpcd))
+		tmp |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	drm_dp_dpcd_writeb(dp_info->aux, DP_LANE_COUNT_SET, tmp);
+
+	/* set the link rate on the sink */
+	tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock);
+	drm_dp_dpcd_writeb(dp_info->aux, DP_LINK_BW_SET, tmp);
+
+	/* start training on the source */
+	amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder,
+					   ATOM_ENCODER_CMD_DP_LINK_TRAINING_START, 0);
+
+	/* disable the training pattern on the sink */
+	drm_dp_dpcd_writeb(dp_info->aux,
+			   DP_TRAINING_PATTERN_SET,
+			   DP_TRAINING_PATTERN_DISABLE);
+
+	return 0;
+}
+
+static int
+amdgpu_atombios_dp_link_train_finish(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+	udelay(400);
+
+	/* disable the training pattern on the sink */
+	drm_dp_dpcd_writeb(dp_info->aux,
+			   DP_TRAINING_PATTERN_SET,
+			   DP_TRAINING_PATTERN_DISABLE);
+
+	/* disable the training pattern on the source */
+	amdgpu_atombios_encoder_setup_dig_encoder(dp_info->encoder,
+					   ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE, 0);
+
+	return 0;
+}
+
+static int
+amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+	bool clock_recovery;
+	u8 voltage;
+	int i;
+
+	amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_1);
+	memset(dp_info->train_set, 0, 4);
+	amdgpu_atombios_dp_update_vs_emph(dp_info);
+
+	udelay(400);
+
+	/* clock recovery loop */
+	clock_recovery = false;
+	dp_info->tries = 0;
+	voltage = 0xff;
+	while (1) {
+		drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
+
+		if (drm_dp_dpcd_read_link_status(dp_info->aux,
+						 dp_info->link_status) < 0) {
+			DRM_ERROR("displayport link status failed\n");
+			break;
+		}
+
+		if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+			clock_recovery = true;
+			break;
+		}
+
+		for (i = 0; i < dp_info->dp_lane_count; i++) {
+			if ((dp_info->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
+				break;
+		}
+		if (i == dp_info->dp_lane_count) {
+			DRM_ERROR("clock recovery reached max voltage\n");
+			break;
+		}
+
+		if ((dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
+			++dp_info->tries;
+			if (dp_info->tries == 5) {
+				DRM_ERROR("clock recovery tried 5 times\n");
+				break;
+			}
+		} else
+			dp_info->tries = 0;
+
+		voltage = dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+
+		/* Compute new train_set as requested by sink */
+		amdgpu_atombios_dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count,
+					     dp_info->train_set);
+
+		amdgpu_atombios_dp_update_vs_emph(dp_info);
+	}
+	if (!clock_recovery) {
+		DRM_ERROR("clock recovery failed\n");
+		return -1;
+	} else {
+		DRM_DEBUG_KMS("clock recovery at voltage %d pre-emphasis %d\n",
+			  dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+			  (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK) >>
+			  DP_TRAIN_PRE_EMPHASIS_SHIFT);
+		return 0;
+	}
+}
+
+static int
+amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_info)
+{
+	bool channel_eq;
+
+	if (dp_info->tp3_supported)
+		amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_3);
+	else
+		amdgpu_atombios_dp_set_tp(dp_info, DP_TRAINING_PATTERN_2);
+
+	/* channel equalization loop */
+	dp_info->tries = 0;
+	channel_eq = false;
+	while (1) {
+		drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
+
+		if (drm_dp_dpcd_read_link_status(dp_info->aux,
+						 dp_info->link_status) < 0) {
+			DRM_ERROR("displayport link status failed\n");
+			break;
+		}
+
+		if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+			channel_eq = true;
+			break;
+		}
+
+		/* Try 5 times */
+		if (dp_info->tries > 5) {
+			DRM_ERROR("channel eq failed: 5 tries\n");
+			break;
+		}
+
+		/* Compute new train_set as requested by sink */
+		amdgpu_atombios_dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count,
+					     dp_info->train_set);
+
+		amdgpu_atombios_dp_update_vs_emph(dp_info);
+		dp_info->tries++;
+	}
+
+	if (!channel_eq) {
+		DRM_ERROR("channel eq failed\n");
+		return -1;
+	} else {
+		DRM_DEBUG_KMS("channel eq at voltage %d pre-emphasis %d\n",
+			  dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+			  (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK)
+			  >> DP_TRAIN_PRE_EMPHASIS_SHIFT);
+		return 0;
+	}
+}
+
+void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
+			    struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_connector *amdgpu_connector;
+	struct amdgpu_connector_atom_dig *dig_connector;
+	struct amdgpu_atombios_dp_link_train_info dp_info;
+	u8 tmp;
+
+	if (!amdgpu_encoder->enc_priv)
+		return;
+
+	amdgpu_connector = to_amdgpu_connector(connector);
+	if (!amdgpu_connector->con_priv)
+		return;
+	dig_connector = amdgpu_connector->con_priv;
+
+	if ((dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT) &&
+	    (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_eDP))
+		return;
+
+	if (drm_dp_dpcd_readb(&amdgpu_connector->ddc_bus->aux, DP_MAX_LANE_COUNT, &tmp)
+	    == 1) {
+		if (tmp & DP_TPS3_SUPPORTED)
+			dp_info.tp3_supported = true;
+		else
+			dp_info.tp3_supported = false;
+	} else {
+		dp_info.tp3_supported = false;
+	}
+
+	memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE);
+	dp_info.adev = adev;
+	dp_info.encoder = encoder;
+	dp_info.connector = connector;
+	dp_info.dp_lane_count = dig_connector->dp_lane_count;
+	dp_info.dp_clock = dig_connector->dp_clock;
+	dp_info.aux = &amdgpu_connector->ddc_bus->aux;
+
+	if (amdgpu_atombios_dp_link_train_init(&dp_info))
+		goto done;
+	if (amdgpu_atombios_dp_link_train_cr(&dp_info))
+		goto done;
+	if (amdgpu_atombios_dp_link_train_ce(&dp_info))
+		goto done;
+done:
+	if (amdgpu_atombios_dp_link_train_finish(&dp_info))
+		return;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
new file mode 100644
index 000000000000..3e24acf8133f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_DP_H__
+#define __ATOMBIOS_DP_H__
+
+void amdgpu_atombios_dp_aux_init(struct amdgpu_connector *amdgpu_connector);
+u8 amdgpu_atombios_dp_get_sinktype(struct amdgpu_connector *amdgpu_connector);
+int amdgpu_atombios_dp_get_dpcd(struct amdgpu_connector *amdgpu_connector);
+int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
+			       struct drm_connector *connector);
+void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
+				 const struct drm_display_mode *mode);
+int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
+				  const struct drm_display_mode *mode);
+bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
+void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
+				    u8 power_state);
+void amdgpu_atombios_dp_link_train(struct drm_encoder *encoder,
+			    struct drm_connector *connector);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
new file mode 100644
index 000000000000..a51f3414b65d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -0,0 +1,2126 @@
+/*
+ * Copyright 2007-11 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ */
+
+#include <linux/pci.h>
+
+#include <acpi/video.h>
+
+#include <drm/drm_edid.h>
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "atom.h"
+#include "atombios_encoders.h"
+#include "atombios_dp.h"
+#include <linux/backlight.h>
+#include "bif/bif_4_1_d.h"
+
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
+{
+	u8 backlight_level;
+	u32 bios_2_scratch;
+
+	bios_2_scratch = RREG32(mmBIOS_SCRATCH_2);
+
+	backlight_level = ((bios_2_scratch & ATOM_S2_CURRENT_BL_LEVEL_MASK) >>
+			   ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
+
+	return backlight_level;
+}
+
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+					    u8 backlight_level)
+{
+	u32 bios_2_scratch;
+
+	bios_2_scratch = RREG32(mmBIOS_SCRATCH_2);
+
+	bios_2_scratch &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+	bios_2_scratch |= ((backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+			   ATOM_S2_CURRENT_BL_LEVEL_MASK);
+
+	WREG32(mmBIOS_SCRATCH_2, bios_2_scratch);
+}
+
+u8
+amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder)
+{
+	struct drm_device *dev = amdgpu_encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+		return 0;
+
+	return amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+}
+
+void
+amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
+				     u8 level)
+{
+	struct drm_encoder *encoder = &amdgpu_encoder->base;
+	struct drm_device *dev = amdgpu_encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder_atom_dig *dig;
+
+	if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+		return;
+
+	if ((amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) &&
+	    amdgpu_encoder->enc_priv) {
+		dig = amdgpu_encoder->enc_priv;
+		dig->backlight_level = level;
+		amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, dig->backlight_level);
+
+		switch (amdgpu_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+			if (dig->backlight_level == 0)
+				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+								       ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+			else {
+				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+								       ATOM_TRANSMITTER_ACTION_BL_BRIGHTNESS_CONTROL, 0, 0);
+				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+								       ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0);
+			}
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
+{
+	u8 level;
+
+	/* Convert brightness to hardware level */
+	if (bd->props.brightness < 0)
+		level = 0;
+	else if (bd->props.brightness > AMDGPU_MAX_BL_LEVEL)
+		level = AMDGPU_MAX_BL_LEVEL;
+	else
+		level = bd->props.brightness;
+
+	return level;
+}
+
+static int amdgpu_atombios_encoder_update_backlight_status(struct backlight_device *bd)
+{
+	struct amdgpu_backlight_privdata *pdata = bl_get_data(bd);
+	struct amdgpu_encoder *amdgpu_encoder = pdata->encoder;
+
+	amdgpu_atombios_encoder_set_backlight_level(amdgpu_encoder,
+					     amdgpu_atombios_encoder_backlight_level(bd));
+
+	return 0;
+}
+
+static int
+amdgpu_atombios_encoder_get_backlight_brightness(struct backlight_device *bd)
+{
+	struct amdgpu_backlight_privdata *pdata = bl_get_data(bd);
+	struct amdgpu_encoder *amdgpu_encoder = pdata->encoder;
+	struct drm_device *dev = amdgpu_encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	return amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+}
+
+static const struct backlight_ops amdgpu_atombios_encoder_backlight_ops = {
+	.get_brightness = amdgpu_atombios_encoder_get_backlight_brightness,
+	.update_status	= amdgpu_atombios_encoder_update_backlight_status,
+};
+
+void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encoder,
+				     struct drm_connector *drm_connector)
+{
+	struct drm_device *dev = amdgpu_encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct backlight_device *bd;
+	struct backlight_properties props;
+	struct amdgpu_backlight_privdata *pdata;
+	struct amdgpu_encoder_atom_dig *dig;
+	char bl_name[16];
+
+	/* Mac laptops with multiple GPUs use the gmux driver for backlight
+	 * so don't register a backlight device
+	 */
+	if ((adev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
+	    (adev->pdev->device == 0x6741))
+		return;
+
+	if (!amdgpu_encoder->enc_priv)
+		return;
+
+	if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+		goto register_acpi_backlight;
+
+	if (!acpi_video_backlight_use_native()) {
+		drm_info(dev, "Skipping amdgpu atom DIG backlight registration\n");
+		goto register_acpi_backlight;
+	}
+
+	pdata = kmalloc(sizeof(struct amdgpu_backlight_privdata), GFP_KERNEL);
+	if (!pdata) {
+		DRM_ERROR("Memory allocation failed\n");
+		goto error;
+	}
+
+	memset(&props, 0, sizeof(props));
+	props.max_brightness = AMDGPU_MAX_BL_LEVEL;
+	props.type = BACKLIGHT_RAW;
+	snprintf(bl_name, sizeof(bl_name),
+		 "amdgpu_bl%d", dev->primary->index);
+	bd = backlight_device_register(bl_name, drm_connector->kdev,
+				       pdata, &amdgpu_atombios_encoder_backlight_ops, &props);
+	if (IS_ERR(bd)) {
+		DRM_ERROR("Backlight registration failed\n");
+		goto error;
+	}
+
+	pdata->encoder = amdgpu_encoder;
+
+	dig = amdgpu_encoder->enc_priv;
+	dig->bl_dev = bd;
+
+	bd->props.brightness = amdgpu_atombios_encoder_get_backlight_brightness(bd);
+	bd->props.power = BACKLIGHT_POWER_ON;
+	backlight_update_status(bd);
+
+	DRM_INFO("amdgpu atom DIG backlight initialized\n");
+
+	return;
+
+error:
+	kfree(pdata);
+	return;
+
+register_acpi_backlight:
+	/* Try registering an ACPI video backlight device instead. */
+	acpi_video_register_backlight();
+}
+
+void
+amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
+{
+	struct drm_device *dev = amdgpu_encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct backlight_device *bd = NULL;
+	struct amdgpu_encoder_atom_dig *dig;
+
+	if (!amdgpu_encoder->enc_priv)
+		return;
+
+	if (!(adev->mode_info.firmware_flags & ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU))
+		return;
+
+	dig = amdgpu_encoder->enc_priv;
+	bd = dig->bl_dev;
+	dig->bl_dev = NULL;
+
+	if (bd) {
+		struct amdgpu_legacy_backlight_privdata *pdata;
+
+		pdata = bl_get_data(bd);
+		backlight_device_unregister(bd);
+		kfree(pdata);
+
+		DRM_INFO("amdgpu atom LVDS backlight unloaded\n");
+	}
+}
+
+bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+	/* set the active encoder to connector routing */
+	amdgpu_encoder_set_active_device(encoder);
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+
+	/* hw bug */
+	if ((mode->flags & DRM_MODE_FLAG_INTERLACE)
+	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
+		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
+
+	/* vertical FP must be at least 1 */
+	if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+		adjusted_mode->crtc_vsync_start++;
+
+	/* get the native mode for scaling */
+	if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT))
+		amdgpu_panel_mode_fixup(encoder, adjusted_mode);
+	else if (amdgpu_encoder->rmx_type != RMX_OFF)
+		amdgpu_panel_mode_fixup(encoder, adjusted_mode);
+
+	if ((amdgpu_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)) {
+		struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+		amdgpu_atombios_dp_set_link_config(connector, adjusted_mode);
+	}
+
+	return true;
+}
+
+static void
+amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	DAC_ENCODER_CONTROL_PS_ALLOCATION args;
+	int index = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+		index = GetIndexIntoMasterTable(COMMAND, DAC1EncoderControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		index = GetIndexIntoMasterTable(COMMAND, DAC2EncoderControl);
+		break;
+	}
+
+	args.ucAction = action;
+	args.ucDacStandard = ATOM_DAC1_PS2;
+	args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+}
+
+static u8 amdgpu_atombios_encoder_get_bpc(struct drm_encoder *encoder)
+{
+	int bpc = 8;
+
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		bpc = amdgpu_crtc->bpc;
+	}
+
+	switch (bpc) {
+	case 0:
+		return PANEL_BPC_UNDEFINE;
+	case 6:
+		return PANEL_6BIT_PER_COLOR;
+	case 8:
+	default:
+		return PANEL_8BIT_PER_COLOR;
+	case 10:
+		return PANEL_10BIT_PER_COLOR;
+	case 12:
+		return PANEL_12BIT_PER_COLOR;
+	case 16:
+		return PANEL_16BIT_PER_COLOR;
+	}
+}
+
+union dvo_encoder_control {
+	ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION ext_tmds;
+	DVO_ENCODER_CONTROL_PS_ALLOCATION dvo;
+	DVO_ENCODER_CONTROL_PS_ALLOCATION_V3 dvo_v3;
+	DVO_ENCODER_CONTROL_PS_ALLOCATION_V1_4 dvo_v4;
+};
+
+static void
+amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	union dvo_encoder_control args;
+	int index = GetIndexIntoMasterTable(COMMAND, DVOEncoderControl);
+	uint8_t frev, crev;
+
+	memset(&args, 0, sizeof(args));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+			/* R4xx, R5xx */
+			args.ext_tmds.sXTmdsEncoder.ucEnable = action;
+
+			if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.ext_tmds.sXTmdsEncoder.ucMisc |= PANEL_ENCODER_MISC_DUAL;
+
+			args.ext_tmds.sXTmdsEncoder.ucMisc |= ATOM_PANEL_MISC_888RGB;
+			break;
+		case 2:
+			/* RS600/690/740 */
+			args.dvo.sDVOEncoder.ucAction = action;
+			args.dvo.sDVOEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			/* DFP1, CRT1, TV1 depending on the type of port */
+			args.dvo.sDVOEncoder.ucDeviceType = ATOM_DEVICE_DFP1_INDEX;
+
+			if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.dvo.sDVOEncoder.usDevAttr.sDigAttrib.ucAttribute |= PANEL_ENCODER_MISC_DUAL;
+			break;
+		case 3:
+			/* R6xx */
+			args.dvo_v3.ucAction = action;
+			args.dvo_v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			args.dvo_v3.ucDVOConfig = 0; /* XXX */
+			break;
+		case 4:
+			/* DCE8 */
+			args.dvo_v4.ucAction = action;
+			args.dvo_v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			args.dvo_v4.ucDVOConfig = 0; /* XXX */
+			args.dvo_v4.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		break;
+	}
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector;
+	struct amdgpu_connector *amdgpu_connector;
+	struct amdgpu_connector_atom_dig *dig_connector;
+
+	/* dp bridges are always DP */
+	if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE)
+		return ATOM_ENCODER_MODE_DP;
+
+	/* DVO is always DVO */
+	if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DVO1) ||
+	    (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1))
+		return ATOM_ENCODER_MODE_DVO;
+
+	connector = amdgpu_get_connector_for_encoder(encoder);
+	/* if we don't have an active device yet, just use one of
+	 * the connectors tied to the encoder.
+	 */
+	if (!connector)
+		connector = amdgpu_get_connector_for_encoder_init(encoder);
+	amdgpu_connector = to_amdgpu_connector(connector);
+
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_DVII:
+	case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */
+		if (amdgpu_audio != 0) {
+			if (amdgpu_connector->use_digital &&
+			    (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE))
+				return ATOM_ENCODER_MODE_HDMI;
+			else if (connector->display_info.is_hdmi &&
+				 (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+				return ATOM_ENCODER_MODE_HDMI;
+			else if (amdgpu_connector->use_digital)
+				return ATOM_ENCODER_MODE_DVI;
+			else
+				return ATOM_ENCODER_MODE_CRT;
+		} else if (amdgpu_connector->use_digital) {
+			return ATOM_ENCODER_MODE_DVI;
+		} else {
+			return ATOM_ENCODER_MODE_CRT;
+		}
+		break;
+	case DRM_MODE_CONNECTOR_DVID:
+	case DRM_MODE_CONNECTOR_HDMIA:
+	default:
+		if (amdgpu_audio != 0) {
+			if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
+				return ATOM_ENCODER_MODE_HDMI;
+			else if (connector->display_info.is_hdmi &&
+				 (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+				return ATOM_ENCODER_MODE_HDMI;
+			else
+				return ATOM_ENCODER_MODE_DVI;
+		} else {
+			return ATOM_ENCODER_MODE_DVI;
+		}
+	case DRM_MODE_CONNECTOR_LVDS:
+		return ATOM_ENCODER_MODE_LVDS;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		dig_connector = amdgpu_connector->con_priv;
+		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+			return ATOM_ENCODER_MODE_DP;
+		} else if (amdgpu_audio != 0) {
+			if (amdgpu_connector->audio == AMDGPU_AUDIO_ENABLE)
+				return ATOM_ENCODER_MODE_HDMI;
+			else if (connector->display_info.is_hdmi &&
+				 (amdgpu_connector->audio == AMDGPU_AUDIO_AUTO))
+				return ATOM_ENCODER_MODE_HDMI;
+			else
+				return ATOM_ENCODER_MODE_DVI;
+		} else {
+			return ATOM_ENCODER_MODE_DVI;
+		}
+	case DRM_MODE_CONNECTOR_eDP:
+		return ATOM_ENCODER_MODE_DP;
+	case DRM_MODE_CONNECTOR_DVIA:
+	case DRM_MODE_CONNECTOR_VGA:
+		return ATOM_ENCODER_MODE_CRT;
+	case DRM_MODE_CONNECTOR_Composite:
+	case DRM_MODE_CONNECTOR_SVIDEO:
+	case DRM_MODE_CONNECTOR_9PinDIN:
+		/* fix me */
+		return ATOM_ENCODER_MODE_TV;
+	}
+}
+
+/*
+ * DIG Encoder/Transmitter Setup
+ *
+ * DCE 6.0
+ * - 3 DIG transmitter blocks UNIPHY0/1/2 (links A and B).
+ * Supports up to 6 digital outputs
+ * - 6 DIG encoder blocks.
+ * - DIG to PHY mapping is hardcoded
+ * DIG1 drives UNIPHY0 link A, A+B
+ * DIG2 drives UNIPHY0 link B
+ * DIG3 drives UNIPHY1 link A, A+B
+ * DIG4 drives UNIPHY1 link B
+ * DIG5 drives UNIPHY2 link A, A+B
+ * DIG6 drives UNIPHY2 link B
+ *
+ * Routing
+ * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links)
+ * Examples:
+ * crtc0 -> dig2 -> LVTMA   links A+B -> TMDS/HDMI
+ * crtc1 -> dig1 -> UNIPHY0 link  B   -> DP
+ * crtc0 -> dig1 -> UNIPHY2 link  A   -> LVDS
+ * crtc1 -> dig2 -> UNIPHY1 link  B+A -> TMDS/HDMI
+ */
+
+union dig_encoder_control {
+	DIG_ENCODER_CONTROL_PS_ALLOCATION v1;
+	DIG_ENCODER_CONTROL_PARAMETERS_V2 v2;
+	DIG_ENCODER_CONTROL_PARAMETERS_V3 v3;
+	DIG_ENCODER_CONTROL_PARAMETERS_V4 v4;
+	DIG_ENCODER_CONTROL_PARAMETERS_V5 v5;
+};
+
+void
+amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
+				   int action, int panel_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	union dig_encoder_control args;
+	int index = GetIndexIntoMasterTable(COMMAND, DIGxEncoderControl);
+	uint8_t frev, crev;
+	int dp_clock = 0;
+	int dp_lane_count = 0;
+	int hpd_id = AMDGPU_HPD_NONE;
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		struct amdgpu_connector_atom_dig *dig_connector =
+			amdgpu_connector->con_priv;
+
+		dp_clock = dig_connector->dp_clock;
+		dp_lane_count = dig_connector->dp_lane_count;
+		hpd_id = amdgpu_connector->hpd.hpd;
+	}
+
+	/* no dig encoder assigned */
+	if (dig->dig_encoder == -1)
+		return;
+
+	memset(&args, 0, sizeof(args));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+			args.v1.ucAction = action;
+			args.v1.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+				args.v3.ucPanelMode = panel_mode;
+			else
+				args.v1.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode))
+				args.v1.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v1.ucLaneNum = 8;
+			else
+				args.v1.ucLaneNum = 4;
+
+			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
+				break;
+			}
+			if (dig->linkb)
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
+			else
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+			break;
+		case 2:
+		case 3:
+			args.v3.ucAction = action;
+			args.v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+				args.v3.ucPanelMode = panel_mode;
+			else
+				args.v3.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (ENCODER_MODE_IS_DP(args.v3.ucEncoderMode))
+				args.v3.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v3.ucLaneNum = 8;
+			else
+				args.v3.ucLaneNum = 4;
+
+			if (ENCODER_MODE_IS_DP(args.v3.ucEncoderMode) && (dp_clock == 270000))
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V3_DPLINKRATE_2_70GHZ;
+			args.v3.acConfig.ucDigSel = dig->dig_encoder;
+			args.v3.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+			break;
+		case 4:
+			args.v4.ucAction = action;
+			args.v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+				args.v4.ucPanelMode = panel_mode;
+			else
+				args.v4.ucEncoderMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (ENCODER_MODE_IS_DP(args.v4.ucEncoderMode))
+				args.v4.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v4.ucLaneNum = 8;
+			else
+				args.v4.ucLaneNum = 4;
+
+			if (ENCODER_MODE_IS_DP(args.v4.ucEncoderMode)) {
+				if (dp_clock == 540000)
+					args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ;
+				else if (dp_clock == 324000)
+					args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_3_24GHZ;
+				else if (dp_clock == 270000)
+					args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_2_70GHZ;
+				else
+					args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_1_62GHZ;
+			}
+			args.v4.acConfig.ucDigSel = dig->dig_encoder;
+			args.v4.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+			if (hpd_id == AMDGPU_HPD_NONE)
+				args.v4.ucHPD_ID = 0;
+			else
+				args.v4.ucHPD_ID = hpd_id + 1;
+			break;
+		case 5:
+			switch (action) {
+			case ATOM_ENCODER_CMD_SETUP_PANEL_MODE:
+				args.v5.asDPPanelModeParam.ucAction = action;
+				args.v5.asDPPanelModeParam.ucPanelMode = panel_mode;
+				args.v5.asDPPanelModeParam.ucDigId = dig->dig_encoder;
+				break;
+			case ATOM_ENCODER_CMD_STREAM_SETUP:
+				args.v5.asStreamParam.ucAction = action;
+				args.v5.asStreamParam.ucDigId = dig->dig_encoder;
+				args.v5.asStreamParam.ucDigMode =
+					amdgpu_atombios_encoder_get_encoder_mode(encoder);
+				if (ENCODER_MODE_IS_DP(args.v5.asStreamParam.ucDigMode))
+					args.v5.asStreamParam.ucLaneNum = dp_lane_count;
+				else if (amdgpu_dig_monitor_is_duallink(encoder,
+									amdgpu_encoder->pixel_clock))
+					args.v5.asStreamParam.ucLaneNum = 8;
+				else
+					args.v5.asStreamParam.ucLaneNum = 4;
+				args.v5.asStreamParam.ulPixelClock =
+					cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
+				args.v5.asStreamParam.ucBitPerColor =
+					amdgpu_atombios_encoder_get_bpc(encoder);
+				args.v5.asStreamParam.ucLinkRateIn270Mhz = dp_clock / 27000;
+				break;
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_START:
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1:
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2:
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3:
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4:
+			case ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE:
+			case ATOM_ENCODER_CMD_DP_VIDEO_OFF:
+			case ATOM_ENCODER_CMD_DP_VIDEO_ON:
+				args.v5.asCmdParam.ucAction = action;
+				args.v5.asCmdParam.ucDigId = dig->dig_encoder;
+				break;
+			default:
+				DRM_ERROR("Unsupported action 0x%x\n", action);
+				break;
+			}
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		break;
+	}
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+}
+
+union dig_transmitter_control {
+	DIG_TRANSMITTER_CONTROL_PS_ALLOCATION v1;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 v6;
+};
+
+void
+amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action,
+					      uint8_t lane_num, uint8_t lane_set)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	union dig_transmitter_control args;
+	int index = 0;
+	uint8_t frev, crev;
+	bool is_dp = false;
+	int pll_id = 0;
+	int dp_clock = 0;
+	int dp_lane_count = 0;
+	int connector_object_id = 0;
+	int dig_encoder = dig->dig_encoder;
+	int hpd_id = AMDGPU_HPD_NONE;
+
+	if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+		connector = amdgpu_get_connector_for_encoder_init(encoder);
+		/* just needed to avoid bailing in the encoder check.  the encoder
+		 * isn't used for init
+		 */
+		dig_encoder = 0;
+	} else
+		connector = amdgpu_get_connector_for_encoder(encoder);
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		struct amdgpu_connector_atom_dig *dig_connector =
+			amdgpu_connector->con_priv;
+
+		hpd_id = amdgpu_connector->hpd.hpd;
+		dp_clock = dig_connector->dp_clock;
+		dp_lane_count = dig_connector->dp_lane_count;
+		connector_object_id =
+			(amdgpu_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	}
+
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		pll_id = amdgpu_crtc->pll_id;
+	}
+
+	/* no dig encoder assigned */
+	if (dig_encoder == -1)
+		return;
+
+	if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)))
+		is_dp = true;
+
+	memset(&args, 0, sizeof(args));
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		index = GetIndexIntoMasterTable(COMMAND, DVOOutputControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+		index = GetIndexIntoMasterTable(COMMAND, LVTMATransmitterControl);
+		break;
+	}
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+			args.v1.ucAction = action;
+			if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+				args.v1.usInitInfo = cpu_to_le16(connector_object_id);
+			} else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+				args.v1.asMode.ucLaneSel = lane_num;
+				args.v1.asMode.ucLaneSet = lane_set;
+			} else {
+				if (is_dp)
+					args.v1.usPixelClock = cpu_to_le16(dp_clock / 10);
+				else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v1.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+				else
+					args.v1.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			}
+
+			args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
+
+			if (dig_encoder)
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER;
+			else
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
+
+			if (dig->linkb)
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB;
+			else
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKA;
+
+			if (is_dp)
+				args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_COHERENT;
+			else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_COHERENT;
+				if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_8LANE_LINK;
+			}
+			break;
+		case 2:
+			args.v2.ucAction = action;
+			if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+				args.v2.usInitInfo = cpu_to_le16(connector_object_id);
+			} else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+				args.v2.asMode.ucLaneSel = lane_num;
+				args.v2.asMode.ucLaneSet = lane_set;
+			} else {
+				if (is_dp)
+					args.v2.usPixelClock = cpu_to_le16(dp_clock / 10);
+				else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v2.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+				else
+					args.v2.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			}
+
+			args.v2.acConfig.ucEncoderSel = dig_encoder;
+			if (dig->linkb)
+				args.v2.acConfig.ucLinkSel = 1;
+
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				args.v2.acConfig.ucTransmitterSel = 0;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				args.v2.acConfig.ucTransmitterSel = 1;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				args.v2.acConfig.ucTransmitterSel = 2;
+				break;
+			}
+
+			if (is_dp) {
+				args.v2.acConfig.fCoherentMode = 1;
+				args.v2.acConfig.fDPConnector = 1;
+			} else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v2.acConfig.fCoherentMode = 1;
+				if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v2.acConfig.fDualLinkConnector = 1;
+			}
+			break;
+		case 3:
+			args.v3.ucAction = action;
+			if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+				args.v3.usInitInfo = cpu_to_le16(connector_object_id);
+			} else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+				args.v3.asMode.ucLaneSel = lane_num;
+				args.v3.asMode.ucLaneSet = lane_set;
+			} else {
+				if (is_dp)
+					args.v3.usPixelClock = cpu_to_le16(dp_clock / 10);
+				else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v3.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+				else
+					args.v3.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			}
+
+			if (is_dp)
+				args.v3.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v3.ucLaneNum = 8;
+			else
+				args.v3.ucLaneNum = 4;
+
+			if (dig->linkb)
+				args.v3.acConfig.ucLinkSel = 1;
+			if (dig_encoder & 1)
+				args.v3.acConfig.ucEncoderSel = 1;
+
+			/* Select the PLL for the PHY
+			 * DP PHY should be clocked from external src if there is
+			 * one.
+			 */
+			/* On DCE4, if there is an external clock, it generates the DP ref clock */
+			if (is_dp && adev->clock.dp_extclk)
+				args.v3.acConfig.ucRefClkSource = 2; /* external src */
+			else
+				args.v3.acConfig.ucRefClkSource = pll_id;
+
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				args.v3.acConfig.ucTransmitterSel = 0;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				args.v3.acConfig.ucTransmitterSel = 1;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				args.v3.acConfig.ucTransmitterSel = 2;
+				break;
+			}
+
+			if (is_dp)
+				args.v3.acConfig.fCoherentMode = 1; /* DP requires coherent */
+			else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v3.acConfig.fCoherentMode = 1;
+				if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v3.acConfig.fDualLinkConnector = 1;
+			}
+			break;
+		case 4:
+			args.v4.ucAction = action;
+			if (action == ATOM_TRANSMITTER_ACTION_INIT) {
+				args.v4.usInitInfo = cpu_to_le16(connector_object_id);
+			} else if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) {
+				args.v4.asMode.ucLaneSel = lane_num;
+				args.v4.asMode.ucLaneSet = lane_set;
+			} else {
+				if (is_dp)
+					args.v4.usPixelClock = cpu_to_le16(dp_clock / 10);
+				else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v4.usPixelClock = cpu_to_le16((amdgpu_encoder->pixel_clock / 2) / 10);
+				else
+					args.v4.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			}
+
+			if (is_dp)
+				args.v4.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v4.ucLaneNum = 8;
+			else
+				args.v4.ucLaneNum = 4;
+
+			if (dig->linkb)
+				args.v4.acConfig.ucLinkSel = 1;
+			if (dig_encoder & 1)
+				args.v4.acConfig.ucEncoderSel = 1;
+
+			/* Select the PLL for the PHY
+			 * DP PHY should be clocked from external src if there is
+			 * one.
+			 */
+			/* On DCE5 DCPLL usually generates the DP ref clock */
+			if (is_dp) {
+				if (adev->clock.dp_extclk)
+					args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_EXTCLK;
+				else
+					args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_DCPLL;
+			} else
+				args.v4.acConfig.ucRefClkSource = pll_id;
+
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				args.v4.acConfig.ucTransmitterSel = 0;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				args.v4.acConfig.ucTransmitterSel = 1;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				args.v4.acConfig.ucTransmitterSel = 2;
+				break;
+			}
+
+			if (is_dp)
+				args.v4.acConfig.fCoherentMode = 1; /* DP requires coherent */
+			else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v4.acConfig.fCoherentMode = 1;
+				if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+					args.v4.acConfig.fDualLinkConnector = 1;
+			}
+			break;
+		case 5:
+			args.v5.ucAction = action;
+			if (is_dp)
+				args.v5.usSymClock = cpu_to_le16(dp_clock / 10);
+			else
+				args.v5.usSymClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYB;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYA;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYD;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYC;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				if (dig->linkb)
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYF;
+				else
+					args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYE;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+				args.v5.ucPhyId = ATOM_PHY_ID_UNIPHYG;
+				break;
+			}
+			if (is_dp)
+				args.v5.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v5.ucLaneNum = 8;
+			else
+				args.v5.ucLaneNum = 4;
+			args.v5.ucConnObjId = connector_object_id;
+			args.v5.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (is_dp && adev->clock.dp_extclk)
+				args.v5.asConfig.ucPhyClkSrcId = ENCODER_REFCLK_SRC_EXTCLK;
+			else
+				args.v5.asConfig.ucPhyClkSrcId = pll_id;
+
+			if (is_dp)
+				args.v5.asConfig.ucCoherentMode = 1; /* DP requires coherent */
+			else if (amdgpu_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (dig->coherent_mode)
+					args.v5.asConfig.ucCoherentMode = 1;
+			}
+			if (hpd_id == AMDGPU_HPD_NONE)
+				args.v5.asConfig.ucHPDSel = 0;
+			else
+				args.v5.asConfig.ucHPDSel = hpd_id + 1;
+			args.v5.ucDigEncoderSel = 1 << dig_encoder;
+			args.v5.ucDPLaneSet = lane_set;
+			break;
+		case 6:
+			args.v6.ucAction = action;
+			if (is_dp)
+				args.v6.ulSymClock = cpu_to_le32(dp_clock / 10);
+			else
+				args.v6.ulSymClock = cpu_to_le32(amdgpu_encoder->pixel_clock / 10);
+
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+				if (dig->linkb)
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYB;
+				else
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYA;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+				if (dig->linkb)
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYD;
+				else
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYC;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+				if (dig->linkb)
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYF;
+				else
+					args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYE;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+				args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYG;
+				break;
+			}
+			if (is_dp)
+				args.v6.ucLaneNum = dp_lane_count;
+			else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v6.ucLaneNum = 8;
+			else
+				args.v6.ucLaneNum = 4;
+			args.v6.ucConnObjId = connector_object_id;
+			if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH)
+				args.v6.ucDPLaneSet = lane_set;
+			else
+				args.v6.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (hpd_id == AMDGPU_HPD_NONE)
+				args.v6.ucHPDSel = 0;
+			else
+				args.v6.ucHPDSel = hpd_id + 1;
+			args.v6.ucDigEncoderSel = 1 << dig_encoder;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		break;
+	}
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+bool
+amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
+				     int action)
+{
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct drm_device *dev = amdgpu_connector->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	union dig_transmitter_control args;
+	int index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
+	uint8_t frev, crev;
+
+	if (connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+		goto done;
+
+	if ((action != ATOM_TRANSMITTER_ACTION_POWER_ON) &&
+	    (action != ATOM_TRANSMITTER_ACTION_POWER_OFF))
+		goto done;
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		goto done;
+
+	memset(&args, 0, sizeof(args));
+
+	args.v1.ucAction = action;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+	/* wait for the panel to power up */
+	if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
+		int i;
+
+		for (i = 0; i < 300; i++) {
+			if (amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd))
+				return true;
+			mdelay(1);
+		}
+		return false;
+	}
+done:
+	return true;
+}
+
+union external_encoder_control {
+	EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION v1;
+	EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION_V3 v3;
+};
+
+static void
+amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder,
+					struct drm_encoder *ext_encoder,
+					int action)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder *ext_amdgpu_encoder = to_amdgpu_encoder(ext_encoder);
+	union external_encoder_control args;
+	struct drm_connector *connector;
+	int index = GetIndexIntoMasterTable(COMMAND, ExternalEncoderControl);
+	u8 frev, crev;
+	int dp_clock = 0;
+	int dp_lane_count = 0;
+	int connector_object_id = 0;
+	u32 ext_enum = (ext_amdgpu_encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+
+	if (action == EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT)
+		connector = amdgpu_get_connector_for_encoder_init(encoder);
+	else
+		connector = amdgpu_get_connector_for_encoder(encoder);
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		struct amdgpu_connector_atom_dig *dig_connector =
+			amdgpu_connector->con_priv;
+
+		dp_clock = dig_connector->dp_clock;
+		dp_lane_count = dig_connector->dp_lane_count;
+		connector_object_id =
+			(amdgpu_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	}
+
+	memset(&args, 0, sizeof(args));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		/* no params on frev 1 */
+		break;
+	case 2:
+		switch (crev) {
+		case 1:
+		case 2:
+			args.v1.sDigEncoder.ucAction = action;
+			args.v1.sDigEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			args.v1.sDigEncoder.ucEncoderMode =
+				amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (ENCODER_MODE_IS_DP(args.v1.sDigEncoder.ucEncoderMode)) {
+				if (dp_clock == 270000)
+					args.v1.sDigEncoder.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+				args.v1.sDigEncoder.ucLaneNum = dp_lane_count;
+			} else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v1.sDigEncoder.ucLaneNum = 8;
+			else
+				args.v1.sDigEncoder.ucLaneNum = 4;
+			break;
+		case 3:
+			args.v3.sExtEncoder.ucAction = action;
+			if (action == EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT)
+				args.v3.sExtEncoder.usConnectorId = cpu_to_le16(connector_object_id);
+			else
+				args.v3.sExtEncoder.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10);
+			args.v3.sExtEncoder.ucEncoderMode =
+				amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+			if (ENCODER_MODE_IS_DP(args.v3.sExtEncoder.ucEncoderMode)) {
+				if (dp_clock == 270000)
+					args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_DPLINKRATE_2_70GHZ;
+				else if (dp_clock == 540000)
+					args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_DPLINKRATE_5_40GHZ;
+				args.v3.sExtEncoder.ucLaneNum = dp_lane_count;
+			} else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock))
+				args.v3.sExtEncoder.ucLaneNum = 8;
+			else
+				args.v3.sExtEncoder.ucLaneNum = 4;
+			switch (ext_enum) {
+			case GRAPH_OBJECT_ENUM_ID1:
+				args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER1;
+				break;
+			case GRAPH_OBJECT_ENUM_ID2:
+				args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER2;
+				break;
+			case GRAPH_OBJECT_ENUM_ID3:
+				args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER3;
+				break;
+			}
+			args.v3.sExtEncoder.ucBitPerColor = amdgpu_atombios_encoder_get_bpc(encoder);
+			break;
+		default:
+			DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+		return;
+	}
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+static void
+amdgpu_atombios_encoder_setup_dig(struct drm_encoder *encoder, int action)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	struct amdgpu_connector_atom_dig *amdgpu_dig_connector = NULL;
+
+	if (connector) {
+		amdgpu_connector = to_amdgpu_connector(connector);
+		amdgpu_dig_connector = amdgpu_connector->con_priv;
+	}
+
+	if (action == ATOM_ENABLE) {
+		if (!connector)
+			dig->panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+		else
+			dig->panel_mode = amdgpu_atombios_dp_get_panel_mode(encoder, connector);
+
+		/* setup and enable the encoder */
+		amdgpu_atombios_encoder_setup_dig_encoder(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+		amdgpu_atombios_encoder_setup_dig_encoder(encoder,
+						   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
+						   dig->panel_mode);
+		if (ext_encoder)
+			amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+								EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
+		if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+		    connector) {
+			if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+				amdgpu_atombios_encoder_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_ON);
+				amdgpu_dig_connector->edp_on = true;
+			}
+		}
+		/* enable the transmitter */
+		amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+						       ATOM_TRANSMITTER_ACTION_ENABLE,
+						       0, 0);
+		if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+		    connector) {
+			/* DP_SET_POWER_D0 is set in amdgpu_atombios_dp_link_train */
+			amdgpu_atombios_dp_link_train(encoder, connector);
+			amdgpu_atombios_encoder_setup_dig_encoder(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0);
+		}
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			amdgpu_atombios_encoder_set_backlight_level(amdgpu_encoder, dig->backlight_level);
+		if (ext_encoder)
+			amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder, ATOM_ENABLE);
+	} else {
+		if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+		    connector)
+			amdgpu_atombios_encoder_setup_dig_encoder(encoder,
+							   ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
+		if (ext_encoder)
+			amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder, ATOM_DISABLE);
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+							       ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
+
+		if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+		    connector)
+			amdgpu_atombios_dp_set_rx_power_state(connector, DP_SET_POWER_D3);
+		/* disable the transmitter */
+		amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
+						       ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+		if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(encoder)) &&
+		    connector) {
+			if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+				amdgpu_atombios_encoder_set_edp_panel_power(connector,
+								     ATOM_TRANSMITTER_ACTION_POWER_OFF);
+				amdgpu_dig_connector->edp_on = false;
+			}
+		}
+	}
+}
+
+void
+amdgpu_atombios_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+	DRM_DEBUG_KMS("encoder dpms %d to mode %d, devices %08x, active_devices %08x\n",
+		  amdgpu_encoder->encoder_id, mode, amdgpu_encoder->devices,
+		  amdgpu_encoder->active_device);
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+			amdgpu_atombios_encoder_setup_dig(encoder, ATOM_ENABLE);
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			amdgpu_atombios_encoder_setup_dig(encoder, ATOM_DISABLE);
+			break;
+		}
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+			amdgpu_atombios_encoder_setup_dvo(encoder, ATOM_ENABLE);
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			amdgpu_atombios_encoder_setup_dvo(encoder, ATOM_DISABLE);
+			break;
+		}
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+		switch (mode) {
+		case DRM_MODE_DPMS_ON:
+			amdgpu_atombios_encoder_setup_dac(encoder, ATOM_ENABLE);
+			break;
+		case DRM_MODE_DPMS_STANDBY:
+		case DRM_MODE_DPMS_SUSPEND:
+		case DRM_MODE_DPMS_OFF:
+			amdgpu_atombios_encoder_setup_dac(encoder, ATOM_DISABLE);
+			break;
+		}
+		break;
+	default:
+		return;
+	}
+}
+
+union crtc_source_param {
+	SELECT_CRTC_SOURCE_PS_ALLOCATION v1;
+	SELECT_CRTC_SOURCE_PARAMETERS_V2 v2;
+	SELECT_CRTC_SOURCE_PARAMETERS_V3 v3;
+};
+
+void
+amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	union crtc_source_param args;
+	int index = GetIndexIntoMasterTable(COMMAND, SelectCRTC_Source);
+	uint8_t frev, crev;
+	struct amdgpu_encoder_atom_dig *dig;
+
+	memset(&args, 0, sizeof(args));
+
+	if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+		return;
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 1:
+		default:
+			args.v1.ucCRTC = amdgpu_crtc->crtc_id;
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1:
+				args.v1.ucDevice = ATOM_DEVICE_DFP1_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_LVDS:
+			case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
+				if (amdgpu_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT)
+					args.v1.ucDevice = ATOM_DEVICE_LCD1_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_DFP3_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DVO1:
+			case ENCODER_OBJECT_ID_INTERNAL_DDI:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+				args.v1.ucDevice = ATOM_DEVICE_DFP2_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DAC1:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_CRT1_INDEX;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_DAC2:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_TV1_INDEX;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v1.ucDevice = ATOM_DEVICE_CV_INDEX;
+				else
+					args.v1.ucDevice = ATOM_DEVICE_CRT2_INDEX;
+				break;
+			}
+			break;
+		case 2:
+			args.v2.ucCRTC = amdgpu_crtc->crtc_id;
+			if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE) {
+				struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+				if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+					args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+				else if (connector->connector_type == DRM_MODE_CONNECTOR_VGA)
+					args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT;
+				else
+					args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+			} else if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+			} else {
+				args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+			}
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+				dig = amdgpu_encoder->enc_priv;
+				switch (dig->dig_encoder) {
+				case 0:
+					args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+					break;
+				case 1:
+					args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+					break;
+				case 2:
+					args.v2.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+					break;
+				case 3:
+					args.v2.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+					break;
+				case 4:
+					args.v2.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+					break;
+				case 5:
+					args.v2.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+					break;
+				case 6:
+					args.v2.ucEncoderID = ASIC_INT_DIG7_ENCODER_ID;
+					break;
+				}
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+				args.v2.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v2.ucEncoderID = ASIC_INT_DAC1_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v2.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v2.ucEncoderID = ASIC_INT_DAC2_ENCODER_ID;
+				break;
+			}
+			break;
+		case 3:
+			args.v3.ucCRTC = amdgpu_crtc->crtc_id;
+			if (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE) {
+				struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+				if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+					args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+				else if (connector->connector_type == DRM_MODE_CONNECTOR_VGA)
+					args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT;
+				else
+					args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+			} else if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+				args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS;
+			} else {
+				args.v2.ucEncodeMode = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+			}
+			args.v3.ucDstBpc = amdgpu_atombios_encoder_get_bpc(encoder);
+			switch (amdgpu_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+				dig = amdgpu_encoder->enc_priv;
+				switch (dig->dig_encoder) {
+				case 0:
+					args.v3.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+					break;
+				case 1:
+					args.v3.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+					break;
+				case 2:
+					args.v3.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+					break;
+				case 3:
+					args.v3.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+					break;
+				case 4:
+					args.v3.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+					break;
+				case 5:
+					args.v3.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+					break;
+				case 6:
+					args.v3.ucEncoderID = ASIC_INT_DIG7_ENCODER_ID;
+					break;
+				}
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+				args.v3.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v3.ucEncoderID = ASIC_INT_DAC1_ENCODER_ID;
+				break;
+			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+				if (amdgpu_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))
+					args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else if (amdgpu_encoder->active_device & (ATOM_DEVICE_CV_SUPPORT))
+					args.v3.ucEncoderID = ASIC_INT_TV_ENCODER_ID;
+				else
+					args.v3.ucEncoderID = ASIC_INT_DAC2_ENCODER_ID;
+				break;
+			}
+			break;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
+		return;
+	}
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
+
+/* This only needs to be called once at startup */
+void
+amdgpu_atombios_encoder_init_dig(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_encoder *encoder;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+		struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+
+		switch (amdgpu_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+			amdgpu_atombios_encoder_setup_dig_transmitter(encoder, ATOM_TRANSMITTER_ACTION_INIT,
+							       0, 0);
+			break;
+		}
+
+		if (ext_encoder)
+			amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+								EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT);
+	}
+}
+
+static bool
+amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder,
+				 struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_TV_SUPPORT |
+				       ATOM_DEVICE_CV_SUPPORT |
+				       ATOM_DEVICE_CRT_SUPPORT)) {
+		DAC_LOAD_DETECTION_PS_ALLOCATION args;
+		int index = GetIndexIntoMasterTable(COMMAND, DAC_LoadDetection);
+		uint8_t frev, crev;
+
+		memset(&args, 0, sizeof(args));
+
+		if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, &crev))
+			return false;
+
+		args.sDacload.ucMisc = 0;
+
+		if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_DAC1) ||
+		    (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1))
+			args.sDacload.ucDacType = ATOM_DAC_A;
+		else
+			args.sDacload.ucDacType = ATOM_DAC_B;
+
+		if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT1_SUPPORT);
+		else if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CRT2_SUPPORT);
+		else if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_CV_SUPPORT);
+			if (crev >= 3)
+				args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+		} else if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+			args.sDacload.usDeviceID = cpu_to_le16(ATOM_DEVICE_TV1_SUPPORT);
+			if (crev >= 3)
+				args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb;
+		}
+
+		amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+		return true;
+	} else
+		return false;
+}
+
+enum drm_connector_status
+amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder,
+			    struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	uint32_t bios_0_scratch;
+
+	if (!amdgpu_atombios_encoder_dac_load_detect(encoder, connector)) {
+		DRM_DEBUG_KMS("detect returned false \n");
+		return connector_status_unknown;
+	}
+
+	bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+
+	DRM_DEBUG_KMS("Bios 0 scratch %x %08x\n", bios_0_scratch, amdgpu_encoder->devices);
+	if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT1_MASK)
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT2_MASK)
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A))
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A))
+			return connector_status_connected; /* CTV */
+		else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A))
+			return connector_status_connected; /* STV */
+	}
+	return connector_status_disconnected;
+}
+
+enum drm_connector_status
+amdgpu_atombios_encoder_dig_detect(struct drm_encoder *encoder,
+			    struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+	struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+	u32 bios_0_scratch;
+
+	if (!ext_encoder)
+		return connector_status_unknown;
+
+	if ((amdgpu_connector->devices & ATOM_DEVICE_CRT_SUPPORT) == 0)
+		return connector_status_unknown;
+
+	/* load detect on the dp bridge */
+	amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+						EXTERNAL_ENCODER_ACTION_V3_DACLOAD_DETECTION);
+
+	bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+
+	DRM_DEBUG_KMS("Bios 0 scratch %x %08x\n", bios_0_scratch, amdgpu_encoder->devices);
+	if (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT1_MASK)
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT) {
+		if (bios_0_scratch & ATOM_S0_CRT2_MASK)
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_CV_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_CV_MASK|ATOM_S0_CV_MASK_A))
+			return connector_status_connected;
+	}
+	if (amdgpu_connector->devices & ATOM_DEVICE_TV1_SUPPORT) {
+		if (bios_0_scratch & (ATOM_S0_TV1_COMPOSITE | ATOM_S0_TV1_COMPOSITE_A))
+			return connector_status_connected; /* CTV */
+		else if (bios_0_scratch & (ATOM_S0_TV1_SVIDEO | ATOM_S0_TV1_SVIDEO_A))
+			return connector_status_connected; /* STV */
+	}
+	return connector_status_disconnected;
+}
+
+void
+amdgpu_atombios_encoder_setup_ext_encoder_ddc(struct drm_encoder *encoder)
+{
+	struct drm_encoder *ext_encoder = amdgpu_get_external_encoder(encoder);
+
+	if (ext_encoder)
+		/* ddc_setup on the dp bridge */
+		amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder,
+							EXTERNAL_ENCODER_ACTION_V3_DDC_SETUP);
+
+}
+
+void
+amdgpu_atombios_encoder_set_bios_scratch_regs(struct drm_connector *connector,
+				       struct drm_encoder *encoder,
+				       bool connected)
+{
+	struct drm_device *dev = connector->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_connector *amdgpu_connector =
+	    to_amdgpu_connector(connector);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	uint32_t bios_0_scratch, bios_3_scratch, bios_6_scratch;
+
+	bios_0_scratch = RREG32(mmBIOS_SCRATCH_0);
+	bios_3_scratch = RREG32(mmBIOS_SCRATCH_3);
+	bios_6_scratch = RREG32(mmBIOS_SCRATCH_6);
+
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_LCD1_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_LCD1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("LCD1 connected\n");
+			bios_0_scratch |= ATOM_S0_LCD1;
+			bios_3_scratch |= ATOM_S3_LCD1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_LCD1;
+		} else {
+			DRM_DEBUG_KMS("LCD1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_LCD1;
+			bios_3_scratch &= ~ATOM_S3_LCD1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_LCD1;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_CRT1_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_CRT1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("CRT1 connected\n");
+			bios_0_scratch |= ATOM_S0_CRT1_COLOR;
+			bios_3_scratch |= ATOM_S3_CRT1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_CRT1;
+		} else {
+			DRM_DEBUG_KMS("CRT1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_CRT1_MASK;
+			bios_3_scratch &= ~ATOM_S3_CRT1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT1;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_CRT2_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_CRT2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("CRT2 connected\n");
+			bios_0_scratch |= ATOM_S0_CRT2_COLOR;
+			bios_3_scratch |= ATOM_S3_CRT2_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_CRT2;
+		} else {
+			DRM_DEBUG_KMS("CRT2 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_CRT2_MASK;
+			bios_3_scratch &= ~ATOM_S3_CRT2_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_CRT2;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP1_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP1_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP1 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP1;
+			bios_3_scratch |= ATOM_S3_DFP1_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP1;
+		} else {
+			DRM_DEBUG_KMS("DFP1 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP1;
+			bios_3_scratch &= ~ATOM_S3_DFP1_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP1;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP2_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP2 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP2;
+			bios_3_scratch |= ATOM_S3_DFP2_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP2;
+		} else {
+			DRM_DEBUG_KMS("DFP2 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP2;
+			bios_3_scratch &= ~ATOM_S3_DFP2_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP2;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP3_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP3_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP3 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP3;
+			bios_3_scratch |= ATOM_S3_DFP3_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP3;
+		} else {
+			DRM_DEBUG_KMS("DFP3 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP3;
+			bios_3_scratch &= ~ATOM_S3_DFP3_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP3;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP4_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP4_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP4 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP4;
+			bios_3_scratch |= ATOM_S3_DFP4_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP4;
+		} else {
+			DRM_DEBUG_KMS("DFP4 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP4;
+			bios_3_scratch &= ~ATOM_S3_DFP4_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP4;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP5_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP5_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP5 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP5;
+			bios_3_scratch |= ATOM_S3_DFP5_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP5;
+		} else {
+			DRM_DEBUG_KMS("DFP5 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP5;
+			bios_3_scratch &= ~ATOM_S3_DFP5_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP5;
+		}
+	}
+	if ((amdgpu_encoder->devices & ATOM_DEVICE_DFP6_SUPPORT) &&
+	    (amdgpu_connector->devices & ATOM_DEVICE_DFP6_SUPPORT)) {
+		if (connected) {
+			DRM_DEBUG_KMS("DFP6 connected\n");
+			bios_0_scratch |= ATOM_S0_DFP6;
+			bios_3_scratch |= ATOM_S3_DFP6_ACTIVE;
+			bios_6_scratch |= ATOM_S6_ACC_REQ_DFP6;
+		} else {
+			DRM_DEBUG_KMS("DFP6 disconnected\n");
+			bios_0_scratch &= ~ATOM_S0_DFP6;
+			bios_3_scratch &= ~ATOM_S3_DFP6_ACTIVE;
+			bios_6_scratch &= ~ATOM_S6_ACC_REQ_DFP6;
+		}
+	}
+
+	WREG32(mmBIOS_SCRATCH_0, bios_0_scratch);
+	WREG32(mmBIOS_SCRATCH_3, bios_3_scratch);
+	WREG32(mmBIOS_SCRATCH_6, bios_6_scratch);
+}
+
+union lvds_info {
+	struct _ATOM_LVDS_INFO info;
+	struct _ATOM_LVDS_INFO_V12 info_12;
+};
+
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, LVDS_Info);
+	uint16_t data_offset, misc;
+	union lvds_info *lvds_info;
+	uint8_t frev, crev;
+	struct amdgpu_encoder_atom_dig *lvds = NULL;
+	int encoder_enum = (encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		lvds_info =
+			(union lvds_info *)(mode_info->atom_context->bios + data_offset);
+		lvds =
+		    kzalloc(sizeof(struct amdgpu_encoder_atom_dig), GFP_KERNEL);
+
+		if (!lvds)
+			return NULL;
+
+		lvds->native_mode.clock =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usPixClk) * 10;
+		lvds->native_mode.hdisplay =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usHActive);
+		lvds->native_mode.vdisplay =
+		    le16_to_cpu(lvds_info->info.sLCDTiming.usVActive);
+		lvds->native_mode.htotal = lvds->native_mode.hdisplay +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usHBlanking_Time);
+		lvds->native_mode.hsync_start = lvds->native_mode.hdisplay +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncOffset);
+		lvds->native_mode.hsync_end = lvds->native_mode.hsync_start +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usHSyncWidth);
+		lvds->native_mode.vtotal = lvds->native_mode.vdisplay +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usVBlanking_Time);
+		lvds->native_mode.vsync_start = lvds->native_mode.vdisplay +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncOffset);
+		lvds->native_mode.vsync_end = lvds->native_mode.vsync_start +
+			le16_to_cpu(lvds_info->info.sLCDTiming.usVSyncWidth);
+		lvds->panel_pwr_delay =
+		    le16_to_cpu(lvds_info->info.usOffDelayInMs);
+		lvds->lcd_misc = lvds_info->info.ucLVDS_Misc;
+
+		misc = le16_to_cpu(lvds_info->info.sLCDTiming.susModeMiscInfo.usAccess);
+		if (misc & ATOM_VSYNC_POLARITY)
+			lvds->native_mode.flags |= DRM_MODE_FLAG_NVSYNC;
+		if (misc & ATOM_HSYNC_POLARITY)
+			lvds->native_mode.flags |= DRM_MODE_FLAG_NHSYNC;
+		if (misc & ATOM_COMPOSITESYNC)
+			lvds->native_mode.flags |= DRM_MODE_FLAG_CSYNC;
+		if (misc & ATOM_INTERLACE)
+			lvds->native_mode.flags |= DRM_MODE_FLAG_INTERLACE;
+		if (misc & ATOM_DOUBLE_CLOCK_MODE)
+			lvds->native_mode.flags |= DRM_MODE_FLAG_DBLSCAN;
+
+		lvds->native_mode.width_mm = le16_to_cpu(lvds_info->info.sLCDTiming.usImageHSize);
+		lvds->native_mode.height_mm = le16_to_cpu(lvds_info->info.sLCDTiming.usImageVSize);
+
+		/* set crtc values */
+		drm_mode_set_crtcinfo(&lvds->native_mode, CRTC_INTERLACE_HALVE_V);
+
+		lvds->lcd_ss_id = lvds_info->info.ucSS_Id;
+
+		encoder->native_mode = lvds->native_mode;
+
+		if (encoder_enum == 2)
+			lvds->linkb = true;
+		else
+			lvds->linkb = false;
+
+		/* parse the lcd record table */
+		if (le16_to_cpu(lvds_info->info.usModePatchTableOffset)) {
+			ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record;
+			ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record;
+			bool bad_record = false;
+			u8 *record;
+
+			if ((frev == 1) && (crev < 2))
+				/* absolute */
+				record = (u8 *)(mode_info->atom_context->bios +
+						le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+			else
+				/* relative */
+				record = (u8 *)(mode_info->atom_context->bios +
+						data_offset +
+						le16_to_cpu(lvds_info->info.usModePatchTableOffset));
+			while (*record != ATOM_RECORD_END_TYPE) {
+				switch (*record) {
+				case LCD_MODE_PATCH_RECORD_MODE_TYPE:
+					record += sizeof(ATOM_PATCH_RECORD_MODE);
+					break;
+				case LCD_RTS_RECORD_TYPE:
+					record += sizeof(ATOM_LCD_RTS_RECORD);
+					break;
+				case LCD_CAP_RECORD_TYPE:
+					record += sizeof(ATOM_LCD_MODE_CONTROL_CAP);
+					break;
+				case LCD_FAKE_EDID_PATCH_RECORD_TYPE:
+					fake_edid_record = (ATOM_FAKE_EDID_PATCH_RECORD *)record;
+					if (fake_edid_record->ucFakeEDIDLength) {
+						const struct drm_edid *edid;
+						int edid_size;
+
+						if (fake_edid_record->ucFakeEDIDLength == 128)
+							edid_size = fake_edid_record->ucFakeEDIDLength;
+						else
+							edid_size = fake_edid_record->ucFakeEDIDLength * 128;
+						edid = drm_edid_alloc(fake_edid_record->ucFakeEDIDString, edid_size);
+						if (drm_edid_valid(edid))
+							adev->mode_info.bios_hardcoded_edid = edid;
+						else
+							drm_edid_free(edid);
+						record += struct_size(fake_edid_record,
+								      ucFakeEDIDString,
+								      edid_size);
+					} else {
+						/* empty fake edid record must be 3 bytes long */
+						record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
+					}
+					break;
+				case LCD_PANEL_RESOLUTION_RECORD_TYPE:
+					panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
+					lvds->native_mode.width_mm = panel_res_record->usHSize;
+					lvds->native_mode.height_mm = panel_res_record->usVSize;
+					record += sizeof(ATOM_PANEL_RESOLUTION_PATCH_RECORD);
+					break;
+				default:
+					DRM_ERROR("Bad LCD record %d\n", *record);
+					bad_record = true;
+					break;
+				}
+				if (bad_record)
+					break;
+			}
+		}
+	}
+	return lvds;
+}
+
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_dig_info(struct amdgpu_encoder *amdgpu_encoder)
+{
+	int encoder_enum = (amdgpu_encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+	struct amdgpu_encoder_atom_dig *dig = kzalloc(sizeof(struct amdgpu_encoder_atom_dig), GFP_KERNEL);
+
+	if (!dig)
+		return NULL;
+
+	/* coherent mode by default */
+	dig->coherent_mode = true;
+	dig->dig_encoder = -1;
+
+	if (encoder_enum == 2)
+		dig->linkb = true;
+	else
+		dig->linkb = false;
+
+	return dig;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
new file mode 100644
index 000000000000..f77cbdef679e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_ENCODER_H__
+#define __ATOMBIOS_ENCODER_H__
+
+u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+						   u8 backlight_level);
+u8
+amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
+void
+amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
+				     u8 level);
+void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encoder,
+				     struct drm_connector *drm_connector);
+void
+amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder);
+bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder);
+bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode);
+int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder,
+				   int action, int panel_mode);
+void
+amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action,
+				       uint8_t lane_num, uint8_t lane_set);
+bool
+amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector,
+				     int action);
+void
+amdgpu_atombios_encoder_dpms(struct drm_encoder *encoder, int mode);
+void
+amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_init_dig(struct amdgpu_device *adev);
+enum drm_connector_status
+amdgpu_atombios_encoder_dac_detect(struct drm_encoder *encoder,
+			    struct drm_connector *connector);
+enum drm_connector_status
+amdgpu_atombios_encoder_dig_detect(struct drm_encoder *encoder,
+			    struct drm_connector *connector);
+void
+amdgpu_atombios_encoder_setup_ext_encoder_ddc(struct drm_encoder *encoder);
+void
+amdgpu_atombios_encoder_set_bios_scratch_regs(struct drm_connector *connector,
+				       struct drm_encoder *encoder,
+				       bool connected);
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder);
+struct amdgpu_encoder_atom_dig *
+amdgpu_atombios_encoder_get_dig_info(struct amdgpu_encoder *amdgpu_encoder);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
new file mode 100644
index 000000000000..a6501114322f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ *
+ */
+
+#include <drm/amdgpu_drm.h>
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
+
+#define TARGET_HW_I2C_CLOCK 50
+
+/* these are a limitation of ProcessI2cChannelTransaction not the hw */
+#define ATOM_MAX_HW_I2C_WRITE 3
+#define ATOM_MAX_HW_I2C_READ  255
+
+static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan,
+				       u8 slave_addr, u8 flags,
+				       u8 *buf, u8 num)
+{
+	struct drm_device *dev = chan->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+	unsigned char *base;
+	u16 out = cpu_to_le16(0);
+	int r = 0;
+
+	memset(&args, 0, sizeof(args));
+
+	mutex_lock(&chan->mutex);
+
+	base = (unsigned char *)adev->mode_info.atom_context->scratch;
+
+	if (flags & HW_I2C_WRITE) {
+		if (num > ATOM_MAX_HW_I2C_WRITE) {
+			DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 3)\n", num);
+			r = -EINVAL;
+			goto done;
+		}
+		if (buf == NULL)
+			args.ucRegIndex = 0;
+		else
+			args.ucRegIndex = buf[0];
+		if (num)
+			num--;
+		if (num) {
+			if (buf) {
+				memcpy(&out, &buf[1], num);
+			} else {
+				DRM_ERROR("hw i2c: missing buf with num > 1\n");
+				r = -EINVAL;
+				goto done;
+			}
+		}
+		args.lpI2CDataOut = cpu_to_le16(out);
+	} else {
+		args.ucRegIndex = 0;
+		args.lpI2CDataOut = 0;
+	}
+
+	args.ucFlag = flags;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = num;
+	args.ucSlaveAddr = slave_addr << 1;
+	args.ucLineNumber = chan->rec.i2c_id;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+
+	/* error */
+	if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) {
+		DRM_DEBUG_KMS("hw_i2c error\n");
+		r = -EIO;
+		goto done;
+	}
+
+	if (!(flags & HW_I2C_WRITE))
+		amdgpu_atombios_copy_swap(buf, base, num, false);
+
+done:
+	mutex_unlock(&chan->mutex);
+
+	return r;
+}
+
+int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
+		      struct i2c_msg *msgs, int num)
+{
+	struct amdgpu_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct i2c_msg *p;
+	int i, remaining, current_count, buffer_offset, max_bytes, ret;
+	u8 flags;
+
+	/* check for bus probe */
+	p = &msgs[0];
+	if ((num == 1) && (p->len == 0)) {
+		ret = amdgpu_atombios_i2c_process_i2c_ch(i2c,
+						  p->addr, HW_I2C_WRITE,
+						  NULL, 0);
+		if (ret)
+			return ret;
+		else
+			return num;
+	}
+
+	for (i = 0; i < num; i++) {
+		p = &msgs[i];
+		remaining = p->len;
+		buffer_offset = 0;
+		/* max_bytes are a limitation of ProcessI2cChannelTransaction not the hw */
+		if (p->flags & I2C_M_RD) {
+			max_bytes = ATOM_MAX_HW_I2C_READ;
+			flags = HW_I2C_READ;
+		} else {
+			max_bytes = ATOM_MAX_HW_I2C_WRITE;
+			flags = HW_I2C_WRITE;
+		}
+		while (remaining) {
+			if (remaining > max_bytes)
+				current_count = max_bytes;
+			else
+				current_count = remaining;
+			ret = amdgpu_atombios_i2c_process_i2c_ch(i2c,
+							  p->addr, flags,
+							  &p->buf[buffer_offset], current_count);
+			if (ret)
+				return ret;
+			remaining -= current_count;
+			buffer_offset += current_count;
+		}
+	}
+
+	return num;
+}
+
+u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+	args.ucRegIndex = offset;
+	args.lpI2CDataOut = data;
+	args.ucFlag = 1;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = 1;
+	args.ucSlaveAddr = slave_addr;
+	args.ucLineNumber = line_number;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
new file mode 100644
index 000000000000..251aaf41f65d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __ATOMBIOS_I2C_H__
+#define __ATOMBIOS_I2C_H__
+
+int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
+		      struct i2c_msg *msgs, int num);
+u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+		u8 slave_addr, u8 line_number, u8 offset, u8 data);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
new file mode 100644
index 000000000000..9cd63b4177bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -0,0 +1,2279 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <drm/amdgpu_drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "cikd.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "cik.h"
+#include "gmc_v7_0.h"
+#include "cik_ih.h"
+#include "dce_v8_0.h"
+#include "gfx_v7_0.h"
+#include "cik_sdma.h"
+#include "uvd_v4_2.h"
+#include "vce_v2_0.h"
+#include "cik_dpm.h"
+
+#include "uvd/uvd_4_2_d.h"
+
+#include "smu/smu_7_0_1_d.h"
+#include "smu/smu_7_0_1_sh_mask.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+#include "amdgpu_dm.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_vkms.h"
+
+static const struct amdgpu_video_codec_info cik_video_codecs_encode_array[] =
+{
+	{
+		.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+		.max_width = 2048,
+		.max_height = 1152,
+		.max_pixels_per_frame = 2048 * 1152,
+		.max_level = 0,
+	},
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_encode =
+{
+	.codec_count = ARRAY_SIZE(cik_video_codecs_encode_array),
+	.codec_array = cik_video_codecs_encode_array,
+};
+
+static const struct amdgpu_video_codec_info cik_video_codecs_decode_array[] =
+{
+	{
+		.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
+		.max_width = 2048,
+		.max_height = 1152,
+		.max_pixels_per_frame = 2048 * 1152,
+		.max_level = 3,
+	},
+	{
+		.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
+		.max_width = 2048,
+		.max_height = 1152,
+		.max_pixels_per_frame = 2048 * 1152,
+		.max_level = 5,
+	},
+	{
+		.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
+		.max_width = 2048,
+		.max_height = 1152,
+		.max_pixels_per_frame = 2048 * 1152,
+		.max_level = 41,
+	},
+	{
+		.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
+		.max_width = 2048,
+		.max_height = 1152,
+		.max_pixels_per_frame = 2048 * 1152,
+		.max_level = 4,
+	},
+};
+
+static const struct amdgpu_video_codecs cik_video_codecs_decode =
+{
+	.codec_count = ARRAY_SIZE(cik_video_codecs_decode_array),
+	.codec_array = cik_video_codecs_decode_array,
+};
+
+static int cik_query_video_codecs(struct amdgpu_device *adev, bool encode,
+				  const struct amdgpu_video_codecs **codecs)
+{
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		if (encode)
+			*codecs = &cik_video_codecs_encode;
+		else
+			*codecs = &cik_video_codecs_decode;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+/*
+ * Indirect registers accessor
+ */
+static u32 cik_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(mmPCIE_INDEX, reg);
+	(void)RREG32(mmPCIE_INDEX);
+	r = RREG32(mmPCIE_DATA);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+	return r;
+}
+
+static void cik_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(mmPCIE_INDEX, reg);
+	(void)RREG32(mmPCIE_INDEX);
+	WREG32(mmPCIE_DATA, v);
+	(void)RREG32(mmPCIE_DATA);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 cik_smc_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->smc_idx_lock, flags);
+	WREG32(mmSMC_IND_INDEX_0, (reg));
+	r = RREG32(mmSMC_IND_DATA_0);
+	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+	return r;
+}
+
+static void cik_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->smc_idx_lock, flags);
+	WREG32(mmSMC_IND_INDEX_0, (reg));
+	WREG32(mmSMC_IND_DATA_0, (v));
+	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+}
+
+static u32 cik_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+	WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+	r = RREG32(mmUVD_CTX_DATA);
+	spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+	return r;
+}
+
+static void cik_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags);
+	WREG32(mmUVD_CTX_INDEX, ((reg) & 0x1ff));
+	WREG32(mmUVD_CTX_DATA, (v));
+	spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags);
+}
+
+static u32 cik_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->didt_idx_lock, flags);
+	WREG32(mmDIDT_IND_INDEX, (reg));
+	r = RREG32(mmDIDT_IND_DATA);
+	spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+	return r;
+}
+
+static void cik_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->didt_idx_lock, flags);
+	WREG32(mmDIDT_IND_INDEX, (reg));
+	WREG32(mmDIDT_IND_DATA, (v));
+	spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static const u32 bonaire_golden_spm_registers[] =
+{
+	0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 bonaire_golden_common_registers[] =
+{
+	0x31dc, 0xffffffff, 0x00000800,
+	0x31dd, 0xffffffff, 0x00000800,
+	0x31e6, 0xffffffff, 0x00007fbf,
+	0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 bonaire_golden_registers[] =
+{
+	0xcd5, 0x00000333, 0x00000333,
+	0xcd4, 0x000c0fc0, 0x00040200,
+	0x2684, 0x00010000, 0x00058208,
+	0xf000, 0xffff1fff, 0x00140000,
+	0xf080, 0xfdfc0fff, 0x00000100,
+	0xf08d, 0x40000000, 0x40000200,
+	0x260c, 0xffffffff, 0x00000000,
+	0x260d, 0xf00fffff, 0x00000400,
+	0x260e, 0x0002021c, 0x00020200,
+	0x31e, 0x00000080, 0x00000000,
+	0x16ec, 0x000000f0, 0x00000070,
+	0x16f0, 0xf0311fff, 0x80300000,
+	0x263e, 0x73773777, 0x12010001,
+	0xd43, 0x00810000, 0x408af000,
+	0x1c0c, 0x31000111, 0x00000011,
+	0xbd2, 0x73773777, 0x12010001,
+	0x883, 0x00007fb6, 0x0021a1b1,
+	0x884, 0x00007fb6, 0x002021b1,
+	0x860, 0x00007fb6, 0x00002191,
+	0x886, 0x00007fb6, 0x002121b1,
+	0x887, 0x00007fb6, 0x002021b1,
+	0x877, 0x00007fb6, 0x00002191,
+	0x878, 0x00007fb6, 0x00002191,
+	0xd8a, 0x0000003f, 0x0000000a,
+	0xd8b, 0x0000003f, 0x0000000a,
+	0xab9, 0x00073ffe, 0x000022a2,
+	0x903, 0x000007ff, 0x00000000,
+	0x2285, 0xf000003f, 0x00000007,
+	0x22fc, 0x00002001, 0x00000001,
+	0x22c9, 0xffffffff, 0x00ffffff,
+	0xc281, 0x0000ff0f, 0x00000000,
+	0xa293, 0x07ffffff, 0x06000000,
+	0x136, 0x00000fff, 0x00000100,
+	0xf9e, 0x00000001, 0x00000002,
+	0x2440, 0x03000000, 0x0362c688,
+	0x2300, 0x000000ff, 0x00000001,
+	0x390, 0x00001fff, 0x00001fff,
+	0x2418, 0x0000007f, 0x00000020,
+	0x2542, 0x00010000, 0x00010000,
+	0x2b05, 0x000003ff, 0x000000f3,
+	0x2b03, 0xffffffff, 0x00001032
+};
+
+static const u32 bonaire_mgcg_cgcg_init[] =
+{
+	0x3108, 0xffffffff, 0xfffffffc,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf0a8, 0xffffffff, 0x00000100,
+	0xf082, 0xffffffff, 0x00000100,
+	0xf0b0, 0xffffffff, 0xc0000100,
+	0xf0b2, 0xffffffff, 0xc0000100,
+	0xf0b1, 0xffffffff, 0xc0000100,
+	0x1579, 0xffffffff, 0x00600100,
+	0xf0a0, 0xffffffff, 0x00000100,
+	0xf085, 0xffffffff, 0x06000100,
+	0xf088, 0xffffffff, 0x00000100,
+	0xf086, 0xffffffff, 0x06000100,
+	0xf081, 0xffffffff, 0x00000100,
+	0xf0b8, 0xffffffff, 0x00000100,
+	0xf089, 0xffffffff, 0x00000100,
+	0xf080, 0xffffffff, 0x00000100,
+	0xf08c, 0xffffffff, 0x00000100,
+	0xf08d, 0xffffffff, 0x00000100,
+	0xf094, 0xffffffff, 0x00000100,
+	0xf095, 0xffffffff, 0x00000100,
+	0xf096, 0xffffffff, 0x00000100,
+	0xf097, 0xffffffff, 0x00000100,
+	0xf098, 0xffffffff, 0x00000100,
+	0xf09f, 0xffffffff, 0x00000100,
+	0xf09e, 0xffffffff, 0x00000100,
+	0xf084, 0xffffffff, 0x06000100,
+	0xf0a4, 0xffffffff, 0x00000100,
+	0xf09d, 0xffffffff, 0x00000100,
+	0xf0ad, 0xffffffff, 0x00000100,
+	0xf0ac, 0xffffffff, 0x00000100,
+	0xf09c, 0xffffffff, 0x00000100,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf008, 0xffffffff, 0x00010000,
+	0xf009, 0xffffffff, 0x00030002,
+	0xf00a, 0xffffffff, 0x00040007,
+	0xf00b, 0xffffffff, 0x00060005,
+	0xf00c, 0xffffffff, 0x00090008,
+	0xf00d, 0xffffffff, 0x00010000,
+	0xf00e, 0xffffffff, 0x00030002,
+	0xf00f, 0xffffffff, 0x00040007,
+	0xf010, 0xffffffff, 0x00060005,
+	0xf011, 0xffffffff, 0x00090008,
+	0xf012, 0xffffffff, 0x00010000,
+	0xf013, 0xffffffff, 0x00030002,
+	0xf014, 0xffffffff, 0x00040007,
+	0xf015, 0xffffffff, 0x00060005,
+	0xf016, 0xffffffff, 0x00090008,
+	0xf017, 0xffffffff, 0x00010000,
+	0xf018, 0xffffffff, 0x00030002,
+	0xf019, 0xffffffff, 0x00040007,
+	0xf01a, 0xffffffff, 0x00060005,
+	0xf01b, 0xffffffff, 0x00090008,
+	0xf01c, 0xffffffff, 0x00010000,
+	0xf01d, 0xffffffff, 0x00030002,
+	0xf01e, 0xffffffff, 0x00040007,
+	0xf01f, 0xffffffff, 0x00060005,
+	0xf020, 0xffffffff, 0x00090008,
+	0xf021, 0xffffffff, 0x00010000,
+	0xf022, 0xffffffff, 0x00030002,
+	0xf023, 0xffffffff, 0x00040007,
+	0xf024, 0xffffffff, 0x00060005,
+	0xf025, 0xffffffff, 0x00090008,
+	0xf026, 0xffffffff, 0x00010000,
+	0xf027, 0xffffffff, 0x00030002,
+	0xf028, 0xffffffff, 0x00040007,
+	0xf029, 0xffffffff, 0x00060005,
+	0xf02a, 0xffffffff, 0x00090008,
+	0xf000, 0xffffffff, 0x96e00200,
+	0x21c2, 0xffffffff, 0x00900100,
+	0x3109, 0xffffffff, 0x0020003f,
+	0xe, 0xffffffff, 0x0140001c,
+	0xf, 0x000f0000, 0x000f0000,
+	0x88, 0xffffffff, 0xc060000c,
+	0x89, 0xc0000fff, 0x00000100,
+	0x3e4, 0xffffffff, 0x00000100,
+	0x3e6, 0x00000101, 0x00000000,
+	0x82a, 0xffffffff, 0x00000104,
+	0x1579, 0xff000fff, 0x00000100,
+	0xc33, 0xc0000fff, 0x00000104,
+	0x3079, 0x00000001, 0x00000001,
+	0x3403, 0xff000ff0, 0x00000100,
+	0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 spectre_golden_spm_registers[] =
+{
+	0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 spectre_golden_common_registers[] =
+{
+	0x31dc, 0xffffffff, 0x00000800,
+	0x31dd, 0xffffffff, 0x00000800,
+	0x31e6, 0xffffffff, 0x00007fbf,
+	0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 spectre_golden_registers[] =
+{
+	0xf000, 0xffff1fff, 0x96940200,
+	0xf003, 0xffff0001, 0xff000000,
+	0xf080, 0xfffc0fff, 0x00000100,
+	0x1bb6, 0x00010101, 0x00010000,
+	0x260d, 0xf00fffff, 0x00000400,
+	0x260e, 0xfffffffc, 0x00020200,
+	0x16ec, 0x000000f0, 0x00000070,
+	0x16f0, 0xf0311fff, 0x80300000,
+	0x263e, 0x73773777, 0x12010001,
+	0x26df, 0x00ff0000, 0x00fc0000,
+	0xbd2, 0x73773777, 0x12010001,
+	0x2285, 0xf000003f, 0x00000007,
+	0x22c9, 0xffffffff, 0x00ffffff,
+	0xa0d4, 0x3f3f3fff, 0x00000082,
+	0xa0d5, 0x0000003f, 0x00000000,
+	0xf9e, 0x00000001, 0x00000002,
+	0x244f, 0xffff03df, 0x00000004,
+	0x31da, 0x00000008, 0x00000008,
+	0x2300, 0x000008ff, 0x00000800,
+	0x2542, 0x00010000, 0x00010000,
+	0x2b03, 0xffffffff, 0x54763210,
+	0x853e, 0x01ff01ff, 0x00000002,
+	0x8526, 0x007ff800, 0x00200000,
+	0x8057, 0xffffffff, 0x00000f40,
+	0xc24d, 0xffffffff, 0x00000001
+};
+
+static const u32 spectre_mgcg_cgcg_init[] =
+{
+	0x3108, 0xffffffff, 0xfffffffc,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf0a8, 0xffffffff, 0x00000100,
+	0xf082, 0xffffffff, 0x00000100,
+	0xf0b0, 0xffffffff, 0x00000100,
+	0xf0b2, 0xffffffff, 0x00000100,
+	0xf0b1, 0xffffffff, 0x00000100,
+	0x1579, 0xffffffff, 0x00600100,
+	0xf0a0, 0xffffffff, 0x00000100,
+	0xf085, 0xffffffff, 0x06000100,
+	0xf088, 0xffffffff, 0x00000100,
+	0xf086, 0xffffffff, 0x06000100,
+	0xf081, 0xffffffff, 0x00000100,
+	0xf0b8, 0xffffffff, 0x00000100,
+	0xf089, 0xffffffff, 0x00000100,
+	0xf080, 0xffffffff, 0x00000100,
+	0xf08c, 0xffffffff, 0x00000100,
+	0xf08d, 0xffffffff, 0x00000100,
+	0xf094, 0xffffffff, 0x00000100,
+	0xf095, 0xffffffff, 0x00000100,
+	0xf096, 0xffffffff, 0x00000100,
+	0xf097, 0xffffffff, 0x00000100,
+	0xf098, 0xffffffff, 0x00000100,
+	0xf09f, 0xffffffff, 0x00000100,
+	0xf09e, 0xffffffff, 0x00000100,
+	0xf084, 0xffffffff, 0x06000100,
+	0xf0a4, 0xffffffff, 0x00000100,
+	0xf09d, 0xffffffff, 0x00000100,
+	0xf0ad, 0xffffffff, 0x00000100,
+	0xf0ac, 0xffffffff, 0x00000100,
+	0xf09c, 0xffffffff, 0x00000100,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf008, 0xffffffff, 0x00010000,
+	0xf009, 0xffffffff, 0x00030002,
+	0xf00a, 0xffffffff, 0x00040007,
+	0xf00b, 0xffffffff, 0x00060005,
+	0xf00c, 0xffffffff, 0x00090008,
+	0xf00d, 0xffffffff, 0x00010000,
+	0xf00e, 0xffffffff, 0x00030002,
+	0xf00f, 0xffffffff, 0x00040007,
+	0xf010, 0xffffffff, 0x00060005,
+	0xf011, 0xffffffff, 0x00090008,
+	0xf012, 0xffffffff, 0x00010000,
+	0xf013, 0xffffffff, 0x00030002,
+	0xf014, 0xffffffff, 0x00040007,
+	0xf015, 0xffffffff, 0x00060005,
+	0xf016, 0xffffffff, 0x00090008,
+	0xf017, 0xffffffff, 0x00010000,
+	0xf018, 0xffffffff, 0x00030002,
+	0xf019, 0xffffffff, 0x00040007,
+	0xf01a, 0xffffffff, 0x00060005,
+	0xf01b, 0xffffffff, 0x00090008,
+	0xf01c, 0xffffffff, 0x00010000,
+	0xf01d, 0xffffffff, 0x00030002,
+	0xf01e, 0xffffffff, 0x00040007,
+	0xf01f, 0xffffffff, 0x00060005,
+	0xf020, 0xffffffff, 0x00090008,
+	0xf021, 0xffffffff, 0x00010000,
+	0xf022, 0xffffffff, 0x00030002,
+	0xf023, 0xffffffff, 0x00040007,
+	0xf024, 0xffffffff, 0x00060005,
+	0xf025, 0xffffffff, 0x00090008,
+	0xf026, 0xffffffff, 0x00010000,
+	0xf027, 0xffffffff, 0x00030002,
+	0xf028, 0xffffffff, 0x00040007,
+	0xf029, 0xffffffff, 0x00060005,
+	0xf02a, 0xffffffff, 0x00090008,
+	0xf02b, 0xffffffff, 0x00010000,
+	0xf02c, 0xffffffff, 0x00030002,
+	0xf02d, 0xffffffff, 0x00040007,
+	0xf02e, 0xffffffff, 0x00060005,
+	0xf02f, 0xffffffff, 0x00090008,
+	0xf000, 0xffffffff, 0x96e00200,
+	0x21c2, 0xffffffff, 0x00900100,
+	0x3109, 0xffffffff, 0x0020003f,
+	0xe, 0xffffffff, 0x0140001c,
+	0xf, 0x000f0000, 0x000f0000,
+	0x88, 0xffffffff, 0xc060000c,
+	0x89, 0xc0000fff, 0x00000100,
+	0x3e4, 0xffffffff, 0x00000100,
+	0x3e6, 0x00000101, 0x00000000,
+	0x82a, 0xffffffff, 0x00000104,
+	0x1579, 0xff000fff, 0x00000100,
+	0xc33, 0xc0000fff, 0x00000104,
+	0x3079, 0x00000001, 0x00000001,
+	0x3403, 0xff000ff0, 0x00000100,
+	0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 kalindi_golden_spm_registers[] =
+{
+	0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 kalindi_golden_common_registers[] =
+{
+	0x31dc, 0xffffffff, 0x00000800,
+	0x31dd, 0xffffffff, 0x00000800,
+	0x31e6, 0xffffffff, 0x00007fbf,
+	0x31e7, 0xffffffff, 0x00007faf
+};
+
+static const u32 kalindi_golden_registers[] =
+{
+	0xf000, 0xffffdfff, 0x6e944040,
+	0x1579, 0xff607fff, 0xfc000100,
+	0xf088, 0xff000fff, 0x00000100,
+	0xf089, 0xff000fff, 0x00000100,
+	0xf080, 0xfffc0fff, 0x00000100,
+	0x1bb6, 0x00010101, 0x00010000,
+	0x260c, 0xffffffff, 0x00000000,
+	0x260d, 0xf00fffff, 0x00000400,
+	0x16ec, 0x000000f0, 0x00000070,
+	0x16f0, 0xf0311fff, 0x80300000,
+	0x263e, 0x73773777, 0x12010001,
+	0x263f, 0xffffffff, 0x00000010,
+	0x26df, 0x00ff0000, 0x00fc0000,
+	0x200c, 0x00001f0f, 0x0000100a,
+	0xbd2, 0x73773777, 0x12010001,
+	0x902, 0x000fffff, 0x000c007f,
+	0x2285, 0xf000003f, 0x00000007,
+	0x22c9, 0x3fff3fff, 0x00ffcfff,
+	0xc281, 0x0000ff0f, 0x00000000,
+	0xa293, 0x07ffffff, 0x06000000,
+	0x136, 0x00000fff, 0x00000100,
+	0xf9e, 0x00000001, 0x00000002,
+	0x31da, 0x00000008, 0x00000008,
+	0x2300, 0x000000ff, 0x00000003,
+	0x853e, 0x01ff01ff, 0x00000002,
+	0x8526, 0x007ff800, 0x00200000,
+	0x8057, 0xffffffff, 0x00000f40,
+	0x2231, 0x001f3ae3, 0x00000082,
+	0x2235, 0x0000001f, 0x00000010,
+	0xc24d, 0xffffffff, 0x00000000
+};
+
+static const u32 kalindi_mgcg_cgcg_init[] =
+{
+	0x3108, 0xffffffff, 0xfffffffc,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf0a8, 0xffffffff, 0x00000100,
+	0xf082, 0xffffffff, 0x00000100,
+	0xf0b0, 0xffffffff, 0x00000100,
+	0xf0b2, 0xffffffff, 0x00000100,
+	0xf0b1, 0xffffffff, 0x00000100,
+	0x1579, 0xffffffff, 0x00600100,
+	0xf0a0, 0xffffffff, 0x00000100,
+	0xf085, 0xffffffff, 0x06000100,
+	0xf088, 0xffffffff, 0x00000100,
+	0xf086, 0xffffffff, 0x06000100,
+	0xf081, 0xffffffff, 0x00000100,
+	0xf0b8, 0xffffffff, 0x00000100,
+	0xf089, 0xffffffff, 0x00000100,
+	0xf080, 0xffffffff, 0x00000100,
+	0xf08c, 0xffffffff, 0x00000100,
+	0xf08d, 0xffffffff, 0x00000100,
+	0xf094, 0xffffffff, 0x00000100,
+	0xf095, 0xffffffff, 0x00000100,
+	0xf096, 0xffffffff, 0x00000100,
+	0xf097, 0xffffffff, 0x00000100,
+	0xf098, 0xffffffff, 0x00000100,
+	0xf09f, 0xffffffff, 0x00000100,
+	0xf09e, 0xffffffff, 0x00000100,
+	0xf084, 0xffffffff, 0x06000100,
+	0xf0a4, 0xffffffff, 0x00000100,
+	0xf09d, 0xffffffff, 0x00000100,
+	0xf0ad, 0xffffffff, 0x00000100,
+	0xf0ac, 0xffffffff, 0x00000100,
+	0xf09c, 0xffffffff, 0x00000100,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf008, 0xffffffff, 0x00010000,
+	0xf009, 0xffffffff, 0x00030002,
+	0xf00a, 0xffffffff, 0x00040007,
+	0xf00b, 0xffffffff, 0x00060005,
+	0xf00c, 0xffffffff, 0x00090008,
+	0xf00d, 0xffffffff, 0x00010000,
+	0xf00e, 0xffffffff, 0x00030002,
+	0xf00f, 0xffffffff, 0x00040007,
+	0xf010, 0xffffffff, 0x00060005,
+	0xf011, 0xffffffff, 0x00090008,
+	0xf000, 0xffffffff, 0x96e00200,
+	0x21c2, 0xffffffff, 0x00900100,
+	0x3109, 0xffffffff, 0x0020003f,
+	0xe, 0xffffffff, 0x0140001c,
+	0xf, 0x000f0000, 0x000f0000,
+	0x88, 0xffffffff, 0xc060000c,
+	0x89, 0xc0000fff, 0x00000100,
+	0x82a, 0xffffffff, 0x00000104,
+	0x1579, 0xff000fff, 0x00000100,
+	0xc33, 0xc0000fff, 0x00000104,
+	0x3079, 0x00000001, 0x00000001,
+	0x3403, 0xff000ff0, 0x00000100,
+	0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 hawaii_golden_spm_registers[] =
+{
+	0xc200, 0xe0ffffff, 0xe0000000
+};
+
+static const u32 hawaii_golden_common_registers[] =
+{
+	0xc200, 0xffffffff, 0xe0000000,
+	0xa0d4, 0xffffffff, 0x3a00161a,
+	0xa0d5, 0xffffffff, 0x0000002e,
+	0x2684, 0xffffffff, 0x00018208,
+	0x263e, 0xffffffff, 0x12011003
+};
+
+static const u32 hawaii_golden_registers[] =
+{
+	0xcd5, 0x00000333, 0x00000333,
+	0x2684, 0x00010000, 0x00058208,
+	0x260c, 0xffffffff, 0x00000000,
+	0x260d, 0xf00fffff, 0x00000400,
+	0x260e, 0x0002021c, 0x00020200,
+	0x31e, 0x00000080, 0x00000000,
+	0x16ec, 0x000000f0, 0x00000070,
+	0x16f0, 0xf0311fff, 0x80300000,
+	0xd43, 0x00810000, 0x408af000,
+	0x1c0c, 0x31000111, 0x00000011,
+	0xbd2, 0x73773777, 0x12010001,
+	0x848, 0x0000007f, 0x0000001b,
+	0x877, 0x00007fb6, 0x00002191,
+	0xd8a, 0x0000003f, 0x0000000a,
+	0xd8b, 0x0000003f, 0x0000000a,
+	0xab9, 0x00073ffe, 0x000022a2,
+	0x903, 0x000007ff, 0x00000000,
+	0x22fc, 0x00002001, 0x00000001,
+	0x22c9, 0xffffffff, 0x00ffffff,
+	0xc281, 0x0000ff0f, 0x00000000,
+	0xa293, 0x07ffffff, 0x06000000,
+	0xf9e, 0x00000001, 0x00000002,
+	0x31da, 0x00000008, 0x00000008,
+	0x31dc, 0x00000f00, 0x00000800,
+	0x31dd, 0x00000f00, 0x00000800,
+	0x31e6, 0x00ffffff, 0x00ff7fbf,
+	0x31e7, 0x00ffffff, 0x00ff7faf,
+	0x2300, 0x000000ff, 0x00000800,
+	0x390, 0x00001fff, 0x00001fff,
+	0x2418, 0x0000007f, 0x00000020,
+	0x2542, 0x00010000, 0x00010000,
+	0x2b80, 0x00100000, 0x000ff07c,
+	0x2b05, 0x000003ff, 0x0000000f,
+	0x2b04, 0xffffffff, 0x7564fdec,
+	0x2b03, 0xffffffff, 0x3120b9a8,
+	0x2b02, 0x20000000, 0x0f9c0000
+};
+
+static const u32 hawaii_mgcg_cgcg_init[] =
+{
+	0x3108, 0xffffffff, 0xfffffffd,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf0a8, 0xffffffff, 0x00000100,
+	0xf082, 0xffffffff, 0x00000100,
+	0xf0b0, 0xffffffff, 0x00000100,
+	0xf0b2, 0xffffffff, 0x00000100,
+	0xf0b1, 0xffffffff, 0x00000100,
+	0x1579, 0xffffffff, 0x00200100,
+	0xf0a0, 0xffffffff, 0x00000100,
+	0xf085, 0xffffffff, 0x06000100,
+	0xf088, 0xffffffff, 0x00000100,
+	0xf086, 0xffffffff, 0x06000100,
+	0xf081, 0xffffffff, 0x00000100,
+	0xf0b8, 0xffffffff, 0x00000100,
+	0xf089, 0xffffffff, 0x00000100,
+	0xf080, 0xffffffff, 0x00000100,
+	0xf08c, 0xffffffff, 0x00000100,
+	0xf08d, 0xffffffff, 0x00000100,
+	0xf094, 0xffffffff, 0x00000100,
+	0xf095, 0xffffffff, 0x00000100,
+	0xf096, 0xffffffff, 0x00000100,
+	0xf097, 0xffffffff, 0x00000100,
+	0xf098, 0xffffffff, 0x00000100,
+	0xf09f, 0xffffffff, 0x00000100,
+	0xf09e, 0xffffffff, 0x00000100,
+	0xf084, 0xffffffff, 0x06000100,
+	0xf0a4, 0xffffffff, 0x00000100,
+	0xf09d, 0xffffffff, 0x00000100,
+	0xf0ad, 0xffffffff, 0x00000100,
+	0xf0ac, 0xffffffff, 0x00000100,
+	0xf09c, 0xffffffff, 0x00000100,
+	0xc200, 0xffffffff, 0xe0000000,
+	0xf008, 0xffffffff, 0x00010000,
+	0xf009, 0xffffffff, 0x00030002,
+	0xf00a, 0xffffffff, 0x00040007,
+	0xf00b, 0xffffffff, 0x00060005,
+	0xf00c, 0xffffffff, 0x00090008,
+	0xf00d, 0xffffffff, 0x00010000,
+	0xf00e, 0xffffffff, 0x00030002,
+	0xf00f, 0xffffffff, 0x00040007,
+	0xf010, 0xffffffff, 0x00060005,
+	0xf011, 0xffffffff, 0x00090008,
+	0xf012, 0xffffffff, 0x00010000,
+	0xf013, 0xffffffff, 0x00030002,
+	0xf014, 0xffffffff, 0x00040007,
+	0xf015, 0xffffffff, 0x00060005,
+	0xf016, 0xffffffff, 0x00090008,
+	0xf017, 0xffffffff, 0x00010000,
+	0xf018, 0xffffffff, 0x00030002,
+	0xf019, 0xffffffff, 0x00040007,
+	0xf01a, 0xffffffff, 0x00060005,
+	0xf01b, 0xffffffff, 0x00090008,
+	0xf01c, 0xffffffff, 0x00010000,
+	0xf01d, 0xffffffff, 0x00030002,
+	0xf01e, 0xffffffff, 0x00040007,
+	0xf01f, 0xffffffff, 0x00060005,
+	0xf020, 0xffffffff, 0x00090008,
+	0xf021, 0xffffffff, 0x00010000,
+	0xf022, 0xffffffff, 0x00030002,
+	0xf023, 0xffffffff, 0x00040007,
+	0xf024, 0xffffffff, 0x00060005,
+	0xf025, 0xffffffff, 0x00090008,
+	0xf026, 0xffffffff, 0x00010000,
+	0xf027, 0xffffffff, 0x00030002,
+	0xf028, 0xffffffff, 0x00040007,
+	0xf029, 0xffffffff, 0x00060005,
+	0xf02a, 0xffffffff, 0x00090008,
+	0xf02b, 0xffffffff, 0x00010000,
+	0xf02c, 0xffffffff, 0x00030002,
+	0xf02d, 0xffffffff, 0x00040007,
+	0xf02e, 0xffffffff, 0x00060005,
+	0xf02f, 0xffffffff, 0x00090008,
+	0xf030, 0xffffffff, 0x00010000,
+	0xf031, 0xffffffff, 0x00030002,
+	0xf032, 0xffffffff, 0x00040007,
+	0xf033, 0xffffffff, 0x00060005,
+	0xf034, 0xffffffff, 0x00090008,
+	0xf035, 0xffffffff, 0x00010000,
+	0xf036, 0xffffffff, 0x00030002,
+	0xf037, 0xffffffff, 0x00040007,
+	0xf038, 0xffffffff, 0x00060005,
+	0xf039, 0xffffffff, 0x00090008,
+	0xf03a, 0xffffffff, 0x00010000,
+	0xf03b, 0xffffffff, 0x00030002,
+	0xf03c, 0xffffffff, 0x00040007,
+	0xf03d, 0xffffffff, 0x00060005,
+	0xf03e, 0xffffffff, 0x00090008,
+	0x30c6, 0xffffffff, 0x00020200,
+	0xcd4, 0xffffffff, 0x00000200,
+	0x570, 0xffffffff, 0x00000400,
+	0x157a, 0xffffffff, 0x00000000,
+	0xbd4, 0xffffffff, 0x00000902,
+	0xf000, 0xffffffff, 0x96940200,
+	0x21c2, 0xffffffff, 0x00900100,
+	0x3109, 0xffffffff, 0x0020003f,
+	0xe, 0xffffffff, 0x0140001c,
+	0xf, 0x000f0000, 0x000f0000,
+	0x88, 0xffffffff, 0xc060000c,
+	0x89, 0xc0000fff, 0x00000100,
+	0x3e4, 0xffffffff, 0x00000100,
+	0x3e6, 0x00000101, 0x00000000,
+	0x82a, 0xffffffff, 0x00000104,
+	0x1579, 0xff000fff, 0x00000100,
+	0xc33, 0xc0000fff, 0x00000104,
+	0x3079, 0x00000001, 0x00000001,
+	0x3403, 0xff000ff0, 0x00000100,
+	0x3603, 0xff000ff0, 0x00000100
+};
+
+static const u32 godavari_golden_registers[] =
+{
+	0x1579, 0xff607fff, 0xfc000100,
+	0x1bb6, 0x00010101, 0x00010000,
+	0x260c, 0xffffffff, 0x00000000,
+	0x260c0, 0xf00fffff, 0x00000400,
+	0x184c, 0xffffffff, 0x00010000,
+	0x16ec, 0x000000f0, 0x00000070,
+	0x16f0, 0xf0311fff, 0x80300000,
+	0x263e, 0x73773777, 0x12010001,
+	0x263f, 0xffffffff, 0x00000010,
+	0x200c, 0x00001f0f, 0x0000100a,
+	0xbd2, 0x73773777, 0x12010001,
+	0x902, 0x000fffff, 0x000c007f,
+	0x2285, 0xf000003f, 0x00000007,
+	0x22c9, 0xffffffff, 0x00ff0fff,
+	0xc281, 0x0000ff0f, 0x00000000,
+	0xa293, 0x07ffffff, 0x06000000,
+	0x136, 0x00000fff, 0x00000100,
+	0x3405, 0x00010000, 0x00810001,
+	0x3605, 0x00010000, 0x00810001,
+	0xf9e, 0x00000001, 0x00000002,
+	0x31da, 0x00000008, 0x00000008,
+	0x31dc, 0x00000f00, 0x00000800,
+	0x31dd, 0x00000f00, 0x00000800,
+	0x31e6, 0x00ffffff, 0x00ff7fbf,
+	0x31e7, 0x00ffffff, 0x00ff7faf,
+	0x2300, 0x000000ff, 0x00000001,
+	0x853e, 0x01ff01ff, 0x00000002,
+	0x8526, 0x007ff800, 0x00200000,
+	0x8057, 0xffffffff, 0x00000f40,
+	0x2231, 0x001f3ae3, 0x00000082,
+	0x2235, 0x0000001f, 0x00000010,
+	0xc24d, 0xffffffff, 0x00000000
+};
+
+static void cik_init_golden_registers(struct amdgpu_device *adev)
+{
+	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_mgcg_cgcg_init,
+							ARRAY_SIZE(bonaire_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_registers,
+							ARRAY_SIZE(bonaire_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_common_registers,
+							ARRAY_SIZE(bonaire_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							bonaire_golden_spm_registers,
+							ARRAY_SIZE(bonaire_golden_spm_registers));
+		break;
+	case CHIP_KABINI:
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_mgcg_cgcg_init,
+							ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_registers,
+							ARRAY_SIZE(kalindi_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_common_registers,
+							ARRAY_SIZE(kalindi_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_spm_registers,
+							ARRAY_SIZE(kalindi_golden_spm_registers));
+		break;
+	case CHIP_MULLINS:
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_mgcg_cgcg_init,
+							ARRAY_SIZE(kalindi_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							godavari_golden_registers,
+							ARRAY_SIZE(godavari_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_common_registers,
+							ARRAY_SIZE(kalindi_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							kalindi_golden_spm_registers,
+							ARRAY_SIZE(kalindi_golden_spm_registers));
+		break;
+	case CHIP_KAVERI:
+		amdgpu_device_program_register_sequence(adev,
+							spectre_mgcg_cgcg_init,
+							ARRAY_SIZE(spectre_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_registers,
+							ARRAY_SIZE(spectre_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_common_registers,
+							ARRAY_SIZE(spectre_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							spectre_golden_spm_registers,
+							ARRAY_SIZE(spectre_golden_spm_registers));
+		break;
+	case CHIP_HAWAII:
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_mgcg_cgcg_init,
+							ARRAY_SIZE(hawaii_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_registers,
+							ARRAY_SIZE(hawaii_golden_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_common_registers,
+							ARRAY_SIZE(hawaii_golden_common_registers));
+		amdgpu_device_program_register_sequence(adev,
+							hawaii_golden_spm_registers,
+							ARRAY_SIZE(hawaii_golden_spm_registers));
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+/**
+ * cik_get_xclk - get the xclk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns the reference clock used by the gfx engine
+ * (CIK).
+ */
+static u32 cik_get_xclk(struct amdgpu_device *adev)
+{
+	u32 reference_clock = adev->clock.spll.reference_freq;
+
+	if (adev->flags & AMD_IS_APU) {
+		if (RREG32_SMC(ixGENERAL_PWRMGT) & GENERAL_PWRMGT__GPU_COUNTER_CLK_MASK)
+			return reference_clock / 2;
+	} else {
+		if (RREG32_SMC(ixCG_CLKPIN_CNTL) & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
+			return reference_clock / 4;
+	}
+	return reference_clock;
+}
+
+/**
+ * cik_srbm_select - select specific register instances
+ *
+ * @adev: amdgpu_device pointer
+ * @me: selected ME (micro engine)
+ * @pipe: pipe
+ * @queue: queue
+ * @vmid: VMID
+ *
+ * Switches the currently active registers instances.  Some
+ * registers are instanced per VMID, others are instanced per
+ * me/pipe/queue combination.
+ */
+void cik_srbm_select(struct amdgpu_device *adev,
+		     u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+	u32 srbm_gfx_cntl =
+		(((pipe << SRBM_GFX_CNTL__PIPEID__SHIFT) & SRBM_GFX_CNTL__PIPEID_MASK)|
+		((me << SRBM_GFX_CNTL__MEID__SHIFT) & SRBM_GFX_CNTL__MEID_MASK)|
+		((vmid << SRBM_GFX_CNTL__VMID__SHIFT) & SRBM_GFX_CNTL__VMID_MASK)|
+		((queue << SRBM_GFX_CNTL__QUEUEID__SHIFT) & SRBM_GFX_CNTL__QUEUEID_MASK));
+	WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl);
+}
+
+static void cik_vga_set_state(struct amdgpu_device *adev, bool state)
+{
+	uint32_t tmp;
+
+	tmp = RREG32(mmCONFIG_CNTL);
+	if (!state)
+		tmp |= CONFIG_CNTL__VGA_DIS_MASK;
+	else
+		tmp &= ~CONFIG_CNTL__VGA_DIS_MASK;
+	WREG32(mmCONFIG_CNTL, tmp);
+}
+
+static bool cik_read_disabled_bios(struct amdgpu_device *adev)
+{
+	u32 bus_cntl;
+	u32 d1vga_control = 0;
+	u32 d2vga_control = 0;
+	u32 vga_render_control = 0;
+	u32 rom_cntl;
+	bool r;
+
+	bus_cntl = RREG32(mmBUS_CNTL);
+	if (adev->mode_info.num_crtc) {
+		d1vga_control = RREG32(mmD1VGA_CONTROL);
+		d2vga_control = RREG32(mmD2VGA_CONTROL);
+		vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
+	}
+	rom_cntl = RREG32_SMC(ixROM_CNTL);
+
+	/* enable the rom */
+	WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
+	if (adev->mode_info.num_crtc) {
+		/* Disable VGA mode */
+		WREG32(mmD1VGA_CONTROL,
+		       (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+					  D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+		WREG32(mmD2VGA_CONTROL,
+		       (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+					  D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+		WREG32(mmVGA_RENDER_CONTROL,
+		       (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
+	}
+	WREG32_SMC(ixROM_CNTL, rom_cntl | ROM_CNTL__SCK_OVERWRITE_MASK);
+
+	r = amdgpu_read_bios(adev);
+
+	/* restore regs */
+	WREG32(mmBUS_CNTL, bus_cntl);
+	if (adev->mode_info.num_crtc) {
+		WREG32(mmD1VGA_CONTROL, d1vga_control);
+		WREG32(mmD2VGA_CONTROL, d2vga_control);
+		WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
+	}
+	WREG32_SMC(ixROM_CNTL, rom_cntl);
+	return r;
+}
+
+static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
+				   u8 *bios, u32 length_bytes)
+{
+	u32 *dw_ptr;
+	unsigned long flags;
+	u32 i, length_dw;
+
+	if (bios == NULL)
+		return false;
+	if (length_bytes == 0)
+		return false;
+	/* APU vbios image is part of sbios image */
+	if (adev->flags & AMD_IS_APU)
+		return false;
+
+	dw_ptr = (u32 *)bios;
+	length_dw = ALIGN(length_bytes, 4) / 4;
+	/* take the smc lock since we are using the smc index */
+	spin_lock_irqsave(&adev->smc_idx_lock, flags);
+	/* set rom index to 0 */
+	WREG32(mmSMC_IND_INDEX_0, ixROM_INDEX);
+	WREG32(mmSMC_IND_DATA_0, 0);
+	/* set index to data for continous read */
+	WREG32(mmSMC_IND_INDEX_0, ixROM_DATA);
+	for (i = 0; i < length_dw; i++)
+		dw_ptr[i] = RREG32(mmSMC_IND_DATA_0);
+	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
+
+	return true;
+}
+
+static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
+	{mmGRBM_STATUS},
+	{mmGRBM_STATUS2},
+	{mmGRBM_STATUS_SE0},
+	{mmGRBM_STATUS_SE1},
+	{mmGRBM_STATUS_SE2},
+	{mmGRBM_STATUS_SE3},
+	{mmSRBM_STATUS},
+	{mmSRBM_STATUS2},
+	{mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET},
+	{mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET},
+	{mmCP_STAT},
+	{mmCP_STALLED_STAT1},
+	{mmCP_STALLED_STAT2},
+	{mmCP_STALLED_STAT3},
+	{mmCP_CPF_BUSY_STAT},
+	{mmCP_CPF_STALLED_STAT1},
+	{mmCP_CPF_STATUS},
+	{mmCP_CPC_BUSY_STAT},
+	{mmCP_CPC_STALLED_STAT1},
+	{mmCP_CPC_STATUS},
+	{mmGB_ADDR_CONFIG},
+	{mmMC_ARB_RAMCFG},
+	{mmGB_TILE_MODE0},
+	{mmGB_TILE_MODE1},
+	{mmGB_TILE_MODE2},
+	{mmGB_TILE_MODE3},
+	{mmGB_TILE_MODE4},
+	{mmGB_TILE_MODE5},
+	{mmGB_TILE_MODE6},
+	{mmGB_TILE_MODE7},
+	{mmGB_TILE_MODE8},
+	{mmGB_TILE_MODE9},
+	{mmGB_TILE_MODE10},
+	{mmGB_TILE_MODE11},
+	{mmGB_TILE_MODE12},
+	{mmGB_TILE_MODE13},
+	{mmGB_TILE_MODE14},
+	{mmGB_TILE_MODE15},
+	{mmGB_TILE_MODE16},
+	{mmGB_TILE_MODE17},
+	{mmGB_TILE_MODE18},
+	{mmGB_TILE_MODE19},
+	{mmGB_TILE_MODE20},
+	{mmGB_TILE_MODE21},
+	{mmGB_TILE_MODE22},
+	{mmGB_TILE_MODE23},
+	{mmGB_TILE_MODE24},
+	{mmGB_TILE_MODE25},
+	{mmGB_TILE_MODE26},
+	{mmGB_TILE_MODE27},
+	{mmGB_TILE_MODE28},
+	{mmGB_TILE_MODE29},
+	{mmGB_TILE_MODE30},
+	{mmGB_TILE_MODE31},
+	{mmGB_MACROTILE_MODE0},
+	{mmGB_MACROTILE_MODE1},
+	{mmGB_MACROTILE_MODE2},
+	{mmGB_MACROTILE_MODE3},
+	{mmGB_MACROTILE_MODE4},
+	{mmGB_MACROTILE_MODE5},
+	{mmGB_MACROTILE_MODE6},
+	{mmGB_MACROTILE_MODE7},
+	{mmGB_MACROTILE_MODE8},
+	{mmGB_MACROTILE_MODE9},
+	{mmGB_MACROTILE_MODE10},
+	{mmGB_MACROTILE_MODE11},
+	{mmGB_MACROTILE_MODE12},
+	{mmGB_MACROTILE_MODE13},
+	{mmGB_MACROTILE_MODE14},
+	{mmGB_MACROTILE_MODE15},
+	{mmCC_RB_BACKEND_DISABLE, true},
+	{mmGC_USER_RB_BACKEND_DISABLE, true},
+	{mmGB_BACKEND_MAP, false},
+	{mmPA_SC_RASTER_CONFIG, true},
+	{mmPA_SC_RASTER_CONFIG_1, true},
+};
+
+
+static uint32_t cik_get_register_value(struct amdgpu_device *adev,
+				       bool indexed, u32 se_num,
+				       u32 sh_num, u32 reg_offset)
+{
+	if (indexed) {
+		uint32_t val;
+		unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
+		unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
+
+		switch (reg_offset) {
+		case mmCC_RB_BACKEND_DISABLE:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
+		case mmGC_USER_RB_BACKEND_DISABLE:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
+		case mmPA_SC_RASTER_CONFIG:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
+		case mmPA_SC_RASTER_CONFIG_1:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config_1;
+		}
+
+		mutex_lock(&adev->grbm_idx_mutex);
+		if (se_num != 0xffffffff || sh_num != 0xffffffff)
+			amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
+
+		val = RREG32(reg_offset);
+
+		if (se_num != 0xffffffff || sh_num != 0xffffffff)
+			amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+		mutex_unlock(&adev->grbm_idx_mutex);
+		return val;
+	} else {
+		unsigned idx;
+
+		switch (reg_offset) {
+		case mmGB_ADDR_CONFIG:
+			return adev->gfx.config.gb_addr_config;
+		case mmMC_ARB_RAMCFG:
+			return adev->gfx.config.mc_arb_ramcfg;
+		case mmGB_TILE_MODE0:
+		case mmGB_TILE_MODE1:
+		case mmGB_TILE_MODE2:
+		case mmGB_TILE_MODE3:
+		case mmGB_TILE_MODE4:
+		case mmGB_TILE_MODE5:
+		case mmGB_TILE_MODE6:
+		case mmGB_TILE_MODE7:
+		case mmGB_TILE_MODE8:
+		case mmGB_TILE_MODE9:
+		case mmGB_TILE_MODE10:
+		case mmGB_TILE_MODE11:
+		case mmGB_TILE_MODE12:
+		case mmGB_TILE_MODE13:
+		case mmGB_TILE_MODE14:
+		case mmGB_TILE_MODE15:
+		case mmGB_TILE_MODE16:
+		case mmGB_TILE_MODE17:
+		case mmGB_TILE_MODE18:
+		case mmGB_TILE_MODE19:
+		case mmGB_TILE_MODE20:
+		case mmGB_TILE_MODE21:
+		case mmGB_TILE_MODE22:
+		case mmGB_TILE_MODE23:
+		case mmGB_TILE_MODE24:
+		case mmGB_TILE_MODE25:
+		case mmGB_TILE_MODE26:
+		case mmGB_TILE_MODE27:
+		case mmGB_TILE_MODE28:
+		case mmGB_TILE_MODE29:
+		case mmGB_TILE_MODE30:
+		case mmGB_TILE_MODE31:
+			idx = (reg_offset - mmGB_TILE_MODE0);
+			return adev->gfx.config.tile_mode_array[idx];
+		case mmGB_MACROTILE_MODE0:
+		case mmGB_MACROTILE_MODE1:
+		case mmGB_MACROTILE_MODE2:
+		case mmGB_MACROTILE_MODE3:
+		case mmGB_MACROTILE_MODE4:
+		case mmGB_MACROTILE_MODE5:
+		case mmGB_MACROTILE_MODE6:
+		case mmGB_MACROTILE_MODE7:
+		case mmGB_MACROTILE_MODE8:
+		case mmGB_MACROTILE_MODE9:
+		case mmGB_MACROTILE_MODE10:
+		case mmGB_MACROTILE_MODE11:
+		case mmGB_MACROTILE_MODE12:
+		case mmGB_MACROTILE_MODE13:
+		case mmGB_MACROTILE_MODE14:
+		case mmGB_MACROTILE_MODE15:
+			idx = (reg_offset - mmGB_MACROTILE_MODE0);
+			return adev->gfx.config.macrotile_mode_array[idx];
+		default:
+			return RREG32(reg_offset);
+		}
+	}
+}
+
+static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
+			     u32 sh_num, u32 reg_offset, u32 *value)
+{
+	uint32_t i;
+
+	*value = 0;
+	for (i = 0; i < ARRAY_SIZE(cik_allowed_read_registers); i++) {
+		bool indexed = cik_allowed_read_registers[i].grbm_indexed;
+
+		if (reg_offset != cik_allowed_read_registers[i].reg_offset)
+			continue;
+
+		*value = cik_get_register_value(adev, indexed, se_num, sh_num,
+						reg_offset);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+struct kv_reset_save_regs {
+	u32 gmcon_reng_execute;
+	u32 gmcon_misc;
+	u32 gmcon_misc3;
+};
+
+static void kv_save_regs_for_reset(struct amdgpu_device *adev,
+				   struct kv_reset_save_regs *save)
+{
+	save->gmcon_reng_execute = RREG32(mmGMCON_RENG_EXECUTE);
+	save->gmcon_misc = RREG32(mmGMCON_MISC);
+	save->gmcon_misc3 = RREG32(mmGMCON_MISC3);
+
+	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute &
+		~GMCON_RENG_EXECUTE__RENG_EXECUTE_ON_PWR_UP_MASK);
+	WREG32(mmGMCON_MISC, save->gmcon_misc &
+		~(GMCON_MISC__RENG_EXECUTE_ON_REG_UPDATE_MASK |
+			GMCON_MISC__STCTRL_STUTTER_EN_MASK));
+}
+
+static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
+				      struct kv_reset_save_regs *save)
+{
+	int i;
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x200010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x300010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x210000);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0xa00010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x21003);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0xb00010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x2b00);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0xc00010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0xd00010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x420000);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x100010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x120202);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x500010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x3e3e36);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x600010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x373f3e);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0x700010ff);
+
+	for (i = 0; i < 5; i++)
+		WREG32(mmGMCON_PGFSM_WRITE, 0);
+
+	WREG32(mmGMCON_PGFSM_WRITE, 0x3e1332);
+	WREG32(mmGMCON_PGFSM_CONFIG, 0xe00010ff);
+
+	WREG32(mmGMCON_MISC3, save->gmcon_misc3);
+	WREG32(mmGMCON_MISC, save->gmcon_misc);
+	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
+}
+
+/**
+ * cik_asic_pci_config_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use PCI Config method to reset the GPU.
+ *
+ * Returns 0 for success.
+ */
+static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
+{
+	struct kv_reset_save_regs kv_save = { 0 };
+	u32 i;
+	int r = -EINVAL;
+
+	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+	if (adev->flags & AMD_IS_APU)
+		kv_save_regs_for_reset(adev, &kv_save);
+
+	/* disable BM */
+	pci_clear_master(adev->pdev);
+	/* reset */
+	amdgpu_device_pci_config_reset(adev);
+
+	udelay(100);
+
+	/* wait for asic to come out of reset */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			adev->has_hw_reset = true;
+			r = 0;
+			break;
+		}
+		udelay(1);
+	}
+
+	/* does asic init need to be run first??? */
+	if (adev->flags & AMD_IS_APU)
+		kv_restore_regs_for_reset(adev, &kv_save);
+
+	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+	return r;
+}
+
+static int cik_asic_supports_baco(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		return amdgpu_dpm_is_baco_supported(adev);
+	default:
+		return 0;
+	}
+}
+
+static enum amd_reset_method
+cik_asic_reset_method(struct amdgpu_device *adev)
+{
+	bool baco_reset;
+
+	if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
+	    amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+		return amdgpu_reset_method;
+
+	if (amdgpu_reset_method != -1)
+		dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n",
+				  amdgpu_reset_method);
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		baco_reset = cik_asic_supports_baco(adev);
+		break;
+	default:
+		baco_reset = false;
+		break;
+	}
+
+	if (baco_reset)
+		return AMD_RESET_METHOD_BACO;
+	else
+		return AMD_RESET_METHOD_LEGACY;
+}
+
+/**
+ * cik_asic_reset - soft reset GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up which blocks are hung and attempt
+ * to reset them.
+ * Returns 0 for success.
+ */
+static int cik_asic_reset(struct amdgpu_device *adev)
+{
+	int r;
+
+	/* APUs don't have full asic reset */
+	if (adev->flags & AMD_IS_APU)
+		return 0;
+
+	if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+		dev_info(adev->dev, "BACO reset\n");
+		r = amdgpu_dpm_baco_reset(adev);
+	} else {
+		dev_info(adev->dev, "PCI CONFIG reset\n");
+		r = cik_asic_pci_config_reset(adev);
+	}
+
+	return r;
+}
+
+static u32 cik_get_config_memsize(struct amdgpu_device *adev)
+{
+	return RREG32(mmCONFIG_MEMSIZE);
+}
+
+static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
+			      u32 cntl_reg, u32 status_reg)
+{
+	int r, i;
+	struct atom_clock_dividers dividers;
+	uint32_t tmp;
+
+	r = amdgpu_atombios_get_clock_dividers(adev,
+					       COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+					       clock, false, &dividers);
+	if (r)
+		return r;
+
+	tmp = RREG32_SMC(cntl_reg);
+	tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+		CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+	tmp |= dividers.post_divider;
+	WREG32_SMC(cntl_reg, tmp);
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int cik_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+	int r = 0;
+
+	r = cik_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+	if (r)
+		return r;
+
+	r = cik_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+	return r;
+}
+
+static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+	int r, i;
+	struct atom_clock_dividers dividers;
+	u32 tmp;
+
+	r = amdgpu_atombios_get_clock_dividers(adev,
+					       COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+					       ecclk, false, &dividers);
+	if (r)
+		return r;
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	tmp = RREG32_SMC(ixCG_ECLK_CNTL);
+	tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
+		CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+	tmp |= dividers.post_divider;
+	WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
+{
+	struct pci_dev *root = adev->pdev->bus->self;
+	u32 speed_cntl, current_data_rate;
+	int i;
+	u16 tmp16;
+
+	if (pci_is_root_bus(adev->pdev->bus))
+		return;
+
+	if (amdgpu_pcie_gen2 == 0)
+		return;
+
+	if (adev->flags & AMD_IS_APU)
+		return;
+
+	if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+					CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
+		return;
+
+	speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+	current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+		PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+		if (current_data_rate == 2) {
+			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+			return;
+		}
+		DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
+	} else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
+		if (current_data_rate == 1) {
+			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+			return;
+		}
+		DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
+	}
+
+	if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
+		return;
+
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
+		/* re-try equalization if gen3 is not already enabled */
+		if (current_data_rate != 2) {
+			u16 bridge_cfg, gpu_cfg;
+			u16 bridge_cfg2, gpu_cfg2;
+			u32 max_lw, current_lw, tmp;
+
+			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+			pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
+
+			tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+			max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
+				PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+			current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK)
+				>> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
+
+			if (current_lw < max_lw) {
+				tmp = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL);
+				if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+					tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK |
+						PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+					tmp |= (max_lw <<
+						PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+					tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK |
+					PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK |
+					PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+					WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
+				}
+			}
+
+			for (i = 0; i < 10; i++) {
+				/* check status */
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_DEVSTA,
+							  &tmp16);
+				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+					break;
+
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+							  &bridge_cfg);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL,
+							  &gpu_cfg);
+
+				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+							  &bridge_cfg2);
+				pcie_capability_read_word(adev->pdev,
+							  PCI_EXP_LNKCTL2,
+							  &gpu_cfg2);
+
+				tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+				tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+				WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+
+				tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+				tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+				WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+
+				msleep(100);
+
+				/* linkctl */
+				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
+								   PCI_EXP_LNKCTL_HAWD,
+								   bridge_cfg &
+								   PCI_EXP_LNKCTL_HAWD);
+				pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL,
+								   PCI_EXP_LNKCTL_HAWD,
+								   gpu_cfg &
+								   PCI_EXP_LNKCTL_HAWD);
+
+				/* linkctl2 */
+				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
+								   PCI_EXP_LNKCTL2_ENTER_COMP |
+								   PCI_EXP_LNKCTL2_TX_MARGIN,
+								   bridge_cfg2 &
+								   (PCI_EXP_LNKCTL2_ENTER_COMP |
+								    PCI_EXP_LNKCTL2_TX_MARGIN));
+				pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+								   PCI_EXP_LNKCTL2_ENTER_COMP |
+								   PCI_EXP_LNKCTL2_TX_MARGIN,
+								   gpu_cfg2 &
+								   (PCI_EXP_LNKCTL2_ENTER_COMP |
+								    PCI_EXP_LNKCTL2_TX_MARGIN));
+
+				tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
+				tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+				WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
+			}
+		}
+	}
+
+	/* set the link speed */
+	speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK |
+		PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+	speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+	WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+
+	tmp16 = 0;
+	if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
+	else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
+	else
+		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+	pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
+					   PCI_EXP_LNKCTL2_TLS, tmp16);
+
+	speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+	speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+	WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
+		if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
+			break;
+		udelay(1);
+	}
+}
+
+static void cik_program_aspm(struct amdgpu_device *adev)
+{
+	u32 data, orig;
+	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+	bool disable_clkreq = false;
+
+	if (!amdgpu_device_should_use_aspm(adev))
+		return;
+
+	if (pci_is_root_bus(adev->pdev->bus))
+		return;
+
+	orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+	data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+	data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) |
+		PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
+	if (orig != data)
+		WREG32_PCIE(ixPCIE_LC_N_FTS_CNTL, data);
+
+	orig = data = RREG32_PCIE(ixPCIE_LC_CNTL3);
+	data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
+	if (orig != data)
+		WREG32_PCIE(ixPCIE_LC_CNTL3, data);
+
+	orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+	data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
+	if (orig != data)
+		WREG32_PCIE(ixPCIE_P_CNTL, data);
+
+	orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+	data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK |
+		PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+	data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+	if (!disable_l0s)
+		data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
+
+	if (!disable_l1) {
+		data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+		data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+		if (orig != data)
+			WREG32_PCIE(ixPCIE_LC_CNTL, data);
+
+		if (!disable_plloff_in_l1) {
+			bool clk_req_support;
+
+			orig = data = RREG32_PCIE(ixPB0_PIF_PWRDOWN_0);
+			data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK |
+				PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) |
+				(7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
+			if (orig != data)
+				WREG32_PCIE(ixPB0_PIF_PWRDOWN_0, data);
+
+			orig = data = RREG32_PCIE(ixPB0_PIF_PWRDOWN_1);
+			data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK |
+				PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) |
+				(7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
+			if (orig != data)
+				WREG32_PCIE(ixPB0_PIF_PWRDOWN_1, data);
+
+			orig = data = RREG32_PCIE(ixPB1_PIF_PWRDOWN_0);
+			data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK |
+				PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) |
+				(7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
+			if (orig != data)
+				WREG32_PCIE(ixPB1_PIF_PWRDOWN_0, data);
+
+			orig = data = RREG32_PCIE(ixPB1_PIF_PWRDOWN_1);
+			data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK |
+				PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) |
+				(7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
+			if (orig != data)
+				WREG32_PCIE(ixPB1_PIF_PWRDOWN_1, data);
+
+			orig = data = RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL);
+			data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+			data |= ~(3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
+			if (orig != data)
+				WREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL, data);
+
+			if (!disable_clkreq) {
+				struct pci_dev *root = adev->pdev->bus->self;
+				u32 lnkcap;
+
+				clk_req_support = false;
+				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+					clk_req_support = true;
+			} else {
+				clk_req_support = false;
+			}
+
+			if (clk_req_support) {
+				orig = data = RREG32_PCIE(ixPCIE_LC_CNTL2);
+				data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+					PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+				if (orig != data)
+					WREG32_PCIE(ixPCIE_LC_CNTL2, data);
+
+				orig = data = RREG32_SMC(ixTHM_CLK_CNTL);
+				data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK |
+					THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+				data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) |
+					(1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
+				if (orig != data)
+					WREG32_SMC(ixTHM_CLK_CNTL, data);
+
+				orig = data = RREG32_SMC(ixMISC_CLK_CTRL);
+				data &= ~(MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL_MASK |
+					MISC_CLK_CTRL__ZCLK_SEL_MASK);
+				data |= (1 << MISC_CLK_CTRL__DEEP_SLEEP_CLK_SEL__SHIFT) |
+					(1 << MISC_CLK_CTRL__ZCLK_SEL__SHIFT);
+				if (orig != data)
+					WREG32_SMC(ixMISC_CLK_CTRL, data);
+
+				orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL);
+				data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
+				if (orig != data)
+					WREG32_SMC(ixCG_CLKPIN_CNTL, data);
+
+				orig = data = RREG32_SMC(ixCG_CLKPIN_CNTL_2);
+				data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
+				if (orig != data)
+					WREG32_SMC(ixCG_CLKPIN_CNTL_2, data);
+
+				orig = data = RREG32_SMC(ixMPLL_BYPASSCLK_SEL);
+				data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+				data |= (4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT);
+				if (orig != data)
+					WREG32_SMC(ixMPLL_BYPASSCLK_SEL, data);
+			}
+		}
+	} else {
+		if (orig != data)
+			WREG32_PCIE(ixPCIE_LC_CNTL, data);
+	}
+
+	orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+	data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+		PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+		PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
+	if (orig != data)
+		WREG32_PCIE(ixPCIE_CNTL2, data);
+
+	if (!disable_l0s) {
+		data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL);
+		if ((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) ==
+				PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+			data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+			if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) &&
+			(data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+				orig = data = RREG32_PCIE(ixPCIE_LC_CNTL);
+				data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+				if (orig != data)
+					WREG32_PCIE(ixPCIE_LC_CNTL, data);
+			}
+		}
+	}
+}
+
+static uint32_t cik_get_rev_id(struct amdgpu_device *adev)
+{
+	return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+		>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
+}
+
+static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+		RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+	}
+}
+
+static void cik_invalidate_hdp(struct amdgpu_device *adev,
+			       struct amdgpu_ring *ring)
+{
+	if (!ring || !ring->funcs->emit_wreg) {
+		WREG32(mmHDP_DEBUG0, 1);
+		RREG32(mmHDP_DEBUG0);
+	} else {
+		amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+	}
+}
+
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we support soft reset */
+	return true;
+}
+
+static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+			       uint64_t *count1)
+{
+	uint32_t perfctr = 0;
+	uint64_t cnt0_of, cnt1_of;
+	int tmp;
+
+	/* This reports 0 on APUs, so return to avoid writing/reading registers
+	 * that may or may not be different from their GPU counterparts
+	 */
+	if (adev->flags & AMD_IS_APU)
+		return;
+
+	/* Set the 2 events that we wish to watch, defined above */
+	/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+	/* Write to enable desired perf counters */
+	WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+	/* Zero out and enable the perf counters
+	 * Write 0x5:
+	 * Bit 0 = Start all counters(1)
+	 * Bit 2 = Global counter reset enable(1)
+	 */
+	WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+	msleep(1000);
+
+	/* Load the shadow and disable the perf counters
+	 * Write 0x2:
+	 * Bit 0 = Stop counters(0)
+	 * Bit 1 = Load the shadow counters(1)
+	 */
+	WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+	/* Read register values to get any >32bit overflow */
+	tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+	cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+	cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+	/* Get the values and add the overflow */
+	*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static bool cik_need_reset_on_init(struct amdgpu_device *adev)
+{
+	u32 clock_cntl, pc;
+
+	if (adev->flags & AMD_IS_APU)
+		return false;
+
+	/* check if the SMC is already running */
+	clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
+	pc = RREG32_SMC(ixSMC_PC_C);
+	if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
+	    (0x20100 <= pc))
+		return true;
+
+	return false;
+}
+
+static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	uint64_t nak_r, nak_g;
+
+	/* Get the number of NAKs received and generated */
+	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+	/* Add the total number of NAKs, i.e the number of replays */
+	return (nak_r + nak_g);
+}
+
+static void cik_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static const struct amdgpu_asic_funcs cik_asic_funcs =
+{
+	.read_disabled_bios = &cik_read_disabled_bios,
+	.read_bios_from_rom = &cik_read_bios_from_rom,
+	.read_register = &cik_read_register,
+	.reset = &cik_asic_reset,
+	.reset_method = &cik_asic_reset_method,
+	.set_vga_state = &cik_vga_set_state,
+	.get_xclk = &cik_get_xclk,
+	.set_uvd_clocks = &cik_set_uvd_clocks,
+	.set_vce_clocks = &cik_set_vce_clocks,
+	.get_config_memsize = &cik_get_config_memsize,
+	.flush_hdp = &cik_flush_hdp,
+	.invalidate_hdp = &cik_invalidate_hdp,
+	.need_full_reset = &cik_need_full_reset,
+	.init_doorbell_index = &legacy_doorbell_index_init,
+	.get_pcie_usage = &cik_get_pcie_usage,
+	.need_reset_on_init = &cik_need_reset_on_init,
+	.get_pcie_replay_count = &cik_get_pcie_replay_count,
+	.supports_baco = &cik_asic_supports_baco,
+	.pre_asic_init = &cik_pre_asic_init,
+	.query_video_codecs = &cik_query_video_codecs,
+};
+
+static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->smc_rreg = &cik_smc_rreg;
+	adev->smc_wreg = &cik_smc_wreg;
+	adev->pcie_rreg = &cik_pcie_rreg;
+	adev->pcie_wreg = &cik_pcie_wreg;
+	adev->uvd_ctx_rreg = &cik_uvd_ctx_rreg;
+	adev->uvd_ctx_wreg = &cik_uvd_ctx_wreg;
+	adev->didt_rreg = &cik_didt_rreg;
+	adev->didt_wreg = &cik_didt_wreg;
+
+	adev->asic_funcs = &cik_asic_funcs;
+
+	adev->rev_id = cik_get_rev_id(adev);
+	adev->external_rev_id = 0xFF;
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		adev->cg_flags =
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x14;
+		break;
+	case CHIP_HAWAII:
+		adev->cg_flags =
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
+		adev->pg_flags = 0;
+		adev->external_rev_id = 0x28;
+		break;
+	case CHIP_KAVERI:
+		adev->cg_flags =
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
+		adev->pg_flags =
+			/*AMD_PG_SUPPORT_GFX_PG |
+			  AMD_PG_SUPPORT_GFX_SMG |
+			  AMD_PG_SUPPORT_GFX_DMG |*/
+			AMD_PG_SUPPORT_UVD |
+			AMD_PG_SUPPORT_VCE |
+			/*  AMD_PG_SUPPORT_CP |
+			  AMD_PG_SUPPORT_GDS |
+			  AMD_PG_SUPPORT_RLC_SMU_HS |
+			  AMD_PG_SUPPORT_ACP |
+			  AMD_PG_SUPPORT_SAMU |*/
+			0;
+		if (adev->pdev->device == 0x1312 ||
+			adev->pdev->device == 0x1316 ||
+			adev->pdev->device == 0x1317)
+			adev->external_rev_id = 0x41;
+		else
+			adev->external_rev_id = 0x1;
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		adev->cg_flags =
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			/*AMD_CG_SUPPORT_GFX_CGCG |*/
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_HDP_MGCG;
+		adev->pg_flags =
+			/*AMD_PG_SUPPORT_GFX_PG |
+			  AMD_PG_SUPPORT_GFX_SMG | */
+			AMD_PG_SUPPORT_UVD |
+			/*AMD_PG_SUPPORT_VCE |
+			  AMD_PG_SUPPORT_CP |
+			  AMD_PG_SUPPORT_GDS |
+			  AMD_PG_SUPPORT_RLC_SMU_HS |
+			  AMD_PG_SUPPORT_SAMU |*/
+			0;
+		if (adev->asic_type == CHIP_KABINI) {
+			if (adev->rev_id == 0)
+				adev->external_rev_id = 0x81;
+			else if (adev->rev_id == 1)
+				adev->external_rev_id = 0x82;
+			else if (adev->rev_id == 2)
+				adev->external_rev_id = 0x85;
+		} else
+			adev->external_rev_id = adev->rev_id + 0xa1;
+		break;
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cik_common_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* move the golden regs per IP block */
+	cik_init_golden_registers(adev);
+	/* enable pcie gen2/3 link */
+	cik_pcie_gen3_enable(adev);
+	/* enable aspm */
+	cik_program_aspm(adev);
+
+	return 0;
+}
+
+static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	return 0;
+}
+
+static int cik_common_resume(struct amdgpu_ip_block *ip_block)
+{
+	return cik_common_hw_init(ip_block);
+}
+
+static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+
+
+static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	/* XXX hard reset?? */
+	return 0;
+}
+
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					    enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					    enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs cik_common_ip_funcs = {
+	.name = "cik_common",
+	.early_init = cik_common_early_init,
+	.hw_init = cik_common_hw_init,
+	.hw_fini = cik_common_hw_fini,
+	.resume = cik_common_resume,
+	.is_idle = cik_common_is_idle,
+	.soft_reset = cik_common_soft_reset,
+	.set_clockgating_state = cik_common_set_clockgating_state,
+	.set_powergating_state = cik_common_set_powergating_state,
+};
+
+static const struct amdgpu_ip_block_version cik_common_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_COMMON,
+	.major = 1,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_common_ip_funcs,
+};
+
+int cik_set_ip_blocks(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+		else if (amdgpu_device_has_dc_support(adev))
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+		else
+			amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_HAWAII:
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+		else if (amdgpu_device_has_dc_support(adev))
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+		else
+			amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_KAVERI:
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+		else if (amdgpu_device_has_dc_support(adev))
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+		else
+			amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
+
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
+		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+		amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
+		if (adev->enable_virtual_display)
+			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+		else if (amdgpu_device_has_dc_support(adev))
+			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#endif
+		else
+			amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
+		amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
+		amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
+		break;
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
new file mode 100644
index 000000000000..f91ab4c246b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_H__
+#define __CIK_H__
+
+#define CIK_FLUSH_GPU_TLB_NUM_WREG	3
+
+void cik_srbm_select(struct amdgpu_device *adev,
+		     u32 me, u32 pipe, u32 queue, u32 vmid);
+int cik_set_ip_blocks(struct amdgpu_device *adev);
+
+void legacy_doorbell_index_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
new file mode 100644
index 000000000000..876a3256dba4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "cikd.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written.  When the
+ * pointers are equal, the ring is idle.  When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr.  When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * cik_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (CIK).
+ */
+static void cik_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+	u32 ih_cntl = RREG32(mmIH_CNTL);
+	u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+	ih_cntl |= IH_CNTL__ENABLE_INTR_MASK;
+	ih_rb_cntl |= IH_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(mmIH_CNTL, ih_cntl);
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+	adev->irq.ih.enabled = true;
+}
+
+/**
+ * cik_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (CIK).
+ */
+static void cik_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+	u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+	u32 ih_cntl = RREG32(mmIH_CNTL);
+
+	ih_rb_cntl &= ~IH_RB_CNTL__RB_ENABLE_MASK;
+	ih_cntl &= ~IH_CNTL__ENABLE_INTR_MASK;
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+	WREG32(mmIH_CNTL, ih_cntl);
+	/* set rptr, wptr to 0 */
+	WREG32(mmIH_RB_RPTR, 0);
+	WREG32(mmIH_RB_WPTR, 0);
+	adev->irq.ih.enabled = false;
+	adev->irq.ih.rptr = 0;
+}
+
+/**
+ * cik_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (CIK).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int cik_ih_irq_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ih_ring *ih = &adev->irq.ih;
+	int rb_bufsz;
+	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+
+	/* disable irqs */
+	cik_ih_disable_interrupts(adev);
+
+	/* setup interrupt control */
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+	 */
+	interrupt_cntl &= ~INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK;
+	/* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+	interrupt_cntl &= ~INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK;
+	WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+	WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+	rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+
+	ih_rb_cntl = (IH_RB_CNTL__WPTR_OVERFLOW_ENABLE_MASK |
+		      IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK |
+		      (rb_bufsz << 1));
+
+	ih_rb_cntl |= IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK;
+
+	/* set the writeback address whether it's enabled or not */
+	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+	/* set rptr, wptr to 0 */
+	WREG32(mmIH_RB_RPTR, 0);
+	WREG32(mmIH_RB_WPTR, 0);
+
+	/* Default settings for IH_CNTL (disabled at first) */
+	ih_cntl = (0x10 << IH_CNTL__MC_WRREQ_CREDIT__SHIFT) |
+		(0x10 << IH_CNTL__MC_WR_CLEAN_CNT__SHIFT) |
+		(0 << IH_CNTL__MC_VMID__SHIFT);
+	/* IH_CNTL__RPTR_REARM_MASK only works if msi's are enabled */
+	if (adev->irq.msi_enabled)
+		ih_cntl |= IH_CNTL__RPTR_REARM_MASK;
+	WREG32(mmIH_CNTL, ih_cntl);
+
+	pci_set_master(adev->pdev);
+
+	/* enable irqs */
+	cik_ih_enable_interrupts(adev);
+
+	if (adev->irq.ih_soft.ring_size)
+		adev->irq.ih_soft.enabled = true;
+
+	return 0;
+}
+
+/**
+ * cik_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (CIK).
+ */
+static void cik_ih_irq_disable(struct amdgpu_device *adev)
+{
+	cik_ih_disable_interrupts(adev);
+	/* Wait and acknowledge irq */
+	mdelay(1);
+}
+
+/**
+ * cik_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (CIK).  Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cik_irq_process().
+ * Returns the value of the wptr.
+ */
+static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
+			   struct amdgpu_ih_ring *ih)
+{
+	u32 wptr, tmp;
+
+	wptr = le32_to_cpu(*ih->wptr_cpu);
+
+	if (ih == &adev->irq.ih_soft)
+		goto out;
+
+	if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
+		wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
+		/* When a ring buffer overflow happen start parsing interrupt
+		 * from the last not overwritten vector (wptr + 16). Hopefully
+		 * this should allow us to catchup.
+		 */
+		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+		ih->rptr = (wptr + 16) & ih->ptr_mask;
+		tmp = RREG32(mmIH_RB_CNTL);
+		tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+		WREG32(mmIH_RB_CNTL, tmp);
+
+		/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+		 * can be detected.
+		 */
+		tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
+		WREG32(mmIH_RB_CNTL, tmp);
+	}
+
+out:
+	return (wptr & ih->ptr_mask);
+}
+
+/*        CIK IV Ring
+ * Each IV ring entry is 128 bits:
+ * [7:0]    - interrupt source id
+ * [31:8]   - reserved
+ * [59:32]  - interrupt source data
+ * [63:60]  - reserved
+ * [71:64]  - RINGID
+ *            CP:
+ *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
+ *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
+ *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
+ *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
+ *            PIPE_ID - ME0 0=3D
+ *                    - ME1&2 compute dispatcher (4 pipes each)
+ *            SDMA:
+ *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
+ *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
+ *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
+ * [79:72]  - VMID
+ * [95:80]  - PASID
+ * [127:96] - reserved
+ */
+
+ /**
+ * cik_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void cik_ih_decode_iv(struct amdgpu_device *adev,
+			     struct amdgpu_ih_ring *ih,
+			     struct amdgpu_iv_entry *entry)
+{
+	/* wptr/rptr are in bytes! */
+	u32 ring_index = ih->rptr >> 2;
+	uint32_t dw[4];
+
+	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+	entry->src_id = dw[0] & 0xff;
+	entry->src_data[0] = dw[1] & 0xfffffff;
+	entry->ring_id = dw[2] & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
+
+	/* wptr/rptr are in bytes! */
+	ih->rptr += 16;
+}
+
+/**
+ * cik_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set wptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void cik_ih_set_rptr(struct amdgpu_device *adev,
+			    struct amdgpu_ih_ring *ih)
+{
+	WREG32(mmIH_RB_RPTR, ih->rptr);
+}
+
+static int cik_ih_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret;
+
+	ret = amdgpu_irq_add_domain(adev);
+	if (ret)
+		return ret;
+
+	cik_ih_set_interrupt_funcs(adev);
+
+	return 0;
+}
+
+static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+	if (r)
+		return r;
+
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_init(adev);
+
+	return r;
+}
+
+static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	amdgpu_irq_fini_sw(adev);
+	amdgpu_irq_remove_domain(adev);
+
+	return 0;
+}
+
+static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	return cik_ih_irq_init(adev);
+}
+
+static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	cik_ih_irq_disable(ip_block->adev);
+
+	return 0;
+}
+
+static int cik_ih_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return cik_ih_hw_fini(ip_block);
+}
+
+static int cik_ih_resume(struct amdgpu_ip_block *ip_block)
+{
+	return cik_ih_hw_init(ip_block);
+}
+
+static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 tmp = RREG32(mmSRBM_STATUS);
+
+	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+		return false;
+
+	return true;
+}
+
+static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned i;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(mmSRBM_STATUS) & SRBM_STATUS__IH_BUSY_MASK;
+		if (!tmp)
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	u32 srbm_soft_reset = 0;
+	u32 tmp = RREG32(mmSRBM_STATUS);
+
+	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+
+	return 0;
+}
+
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs cik_ih_ip_funcs = {
+	.name = "cik_ih",
+	.early_init = cik_ih_early_init,
+	.sw_init = cik_ih_sw_init,
+	.sw_fini = cik_ih_sw_fini,
+	.hw_init = cik_ih_hw_init,
+	.hw_fini = cik_ih_hw_fini,
+	.suspend = cik_ih_suspend,
+	.resume = cik_ih_resume,
+	.is_idle = cik_ih_is_idle,
+	.wait_for_idle = cik_ih_wait_for_idle,
+	.soft_reset = cik_ih_soft_reset,
+	.set_clockgating_state = cik_ih_set_clockgating_state,
+	.set_powergating_state = cik_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs cik_ih_funcs = {
+	.get_wptr = cik_ih_get_wptr,
+	.decode_iv = cik_ih_decode_iv,
+	.set_rptr = cik_ih_set_rptr
+};
+
+static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+	adev->irq.ih_funcs = &cik_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version cik_ih_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_IH,
+	.major = 2,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.h b/drivers/gpu/drm/amd/amdgpu/cik_ih.h
new file mode 100644
index 000000000000..1d9ddee2868e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_IH_H__
+#define __CIK_IH_H__
+
+extern const struct amdgpu_ip_block_version cik_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
new file mode 100644
index 000000000000..9e8715b4739d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -0,0 +1,1367 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+#include "cikd.h"
+#include "cik.h"
+
+#include "bif/bif_4_1_d.h"
+#include "bif/bif_4_1_sh_mask.h"
+
+#include "gca/gfx_7_2_d.h"
+#include "gca/gfx_7_2_enum.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
+{
+	SDMA0_REGISTER_OFFSET,
+	SDMA1_REGISTER_OFFSET
+};
+
+static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
+static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+}
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines.  These engines are used for compute
+ * and gfx.  There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things.  It also has support for tiling/detiling of
+ * buffers.
+ */
+
+/**
+ * cik_sdma_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+static int cik_sdma_init_microcode(struct amdgpu_device *adev)
+{
+	const char *chip_name;
+	int err = 0, i;
+
+	DRM_DEBUG("\n");
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		chip_name = "bonaire";
+		break;
+	case CHIP_HAWAII:
+		chip_name = "hawaii";
+		break;
+	case CHIP_KAVERI:
+		chip_name = "kaveri";
+		break;
+	case CHIP_KABINI:
+		chip_name = "kabini";
+		break;
+	case CHIP_MULLINS:
+		chip_name = "mullins";
+		break;
+	default: BUG();
+	}
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		if (i == 0)
+			err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_sdma.bin", chip_name);
+		else
+			err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_sdma1.bin", chip_name);
+		if (err)
+			goto out;
+	}
+out:
+	if (err) {
+		pr_err("cik_sdma: Failed to load firmware \"%s_sdma%s.bin\"\n",
+		       chip_name, i == 0 ? "" : "1");
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			amdgpu_ucode_release(&adev->sdma.instance[i].fw);
+	}
+	return err;
+}
+
+/**
+ * cik_sdma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (CIK+).
+ */
+static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
+{
+	u32 rptr;
+
+	rptr = *ring->rptr_cpu_addr;
+
+	return (rptr & 0x3fffc) >> 2;
+}
+
+/**
+ * cik_sdma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (CIK+).
+ */
+static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2;
+}
+
+/**
+ * cik_sdma_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (CIK+).
+ */
+static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
+	       (ring->wptr << 2) & 0x3fffc);
+}
+
+static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (sdma && sdma->burst_nop && (i == 0))
+			amdgpu_ring_write(ring, ring->funcs->nop |
+					  SDMA_NOP_COUNT(count - 1));
+		else
+			amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrive vmid from
+ * @ib: IB object to schedule
+ * @flags: unused
+ *
+ * Schedule an IB in the DMA ring (CIK).
+ */
+static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
+				  struct amdgpu_job *job,
+				  struct amdgpu_ib *ib,
+				  uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 extra_bits = vmid & 0xf;
+
+	/* IB packet must end on a 8 DW boundary */
+	cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
+
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
+	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+	amdgpu_ring_write(ring, ib->length_dw);
+
+}
+
+/**
+ * cik_sdma_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+	u32 ref_and_mask;
+
+	if (ring->me == 0)
+		ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
+	else
+		ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
+
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
+	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
+	amdgpu_ring_write(ring, ref_and_mask); /* reference */
+	amdgpu_ring_write(ring, ref_and_mask); /* mask */
+	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
+}
+
+/**
+ * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (CIK).
+ */
+static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+				     unsigned flags)
+{
+	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+	/* write the fence */
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+
+	/* optionally write high bits as well */
+	if (write64bit) {
+		addr += 4;
+		amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+		amdgpu_ring_write(ring, lower_32_bits(addr));
+		amdgpu_ring_write(ring, upper_32_bits(addr));
+		amdgpu_ring_write(ring, upper_32_bits(seq));
+	}
+
+	/* generate an interrupt */
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
+}
+
+/**
+ * cik_sdma_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (CIK).
+ */
+static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
+{
+	u32 rb_cntl;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
+		rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK;
+		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
+	}
+}
+
+/**
+ * cik_sdma_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (CIK).
+ */
+static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
+{
+	/* XXX todo */
+}
+
+/**
+ * cik_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (VI).
+ */
+static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+	u32 f32_cntl, phase_quantum = 0;
+	int i;
+
+	if (amdgpu_sdma_phase_quantum) {
+		unsigned value = amdgpu_sdma_phase_quantum;
+		unsigned unit = 0;
+
+		while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+				SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+			value = (value + 1) >> 1;
+			unit++;
+		}
+		if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+			    SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+			value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+				 SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+			unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+				SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+			WARN_ONCE(1,
+			"clamping sdma_phase_quantum to %uK clock cycles\n",
+				  value << unit);
+		}
+		phase_quantum =
+			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+	}
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
+		if (enable) {
+			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+					AUTO_CTXSW_ENABLE, 1);
+			if (amdgpu_sdma_phase_quantum) {
+				WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
+				       phase_quantum);
+				WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
+				       phase_quantum);
+			}
+		} else {
+			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+					AUTO_CTXSW_ENABLE, 0);
+		}
+
+		WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
+	}
+}
+
+/**
+ * cik_sdma_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (CIK).
+ */
+static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
+{
+	u32 me_cntl;
+	int i;
+
+	if (!enable) {
+		cik_sdma_gfx_stop(adev);
+		cik_sdma_rlc_stop(adev);
+	}
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		me_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
+		if (enable)
+			me_cntl &= ~SDMA0_F32_CNTL__HALT_MASK;
+		else
+			me_cntl |= SDMA0_F32_CNTL__HALT_MASK;
+		WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], me_cntl);
+	}
+}
+
+/**
+ * cik_sdma_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	u32 rb_cntl, ib_cntl;
+	u32 rb_bufsz;
+	int i, j, r;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
+
+		mutex_lock(&adev->srbm_mutex);
+		for (j = 0; j < 16; j++) {
+			cik_srbm_select(adev, 0, 0, 0, j);
+			/* SDMA GFX */
+			WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
+			WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
+			/* XXX SDMA RLC - todo */
+		}
+		cik_srbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+		       adev->gfx.config.gb_addr_config & 0x70);
+
+		WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+
+		/* Set ring buffer size in dwords */
+		rb_bufsz = order_base_2(ring->ring_size / 4);
+		rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+		rb_cntl |= SDMA0_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK |
+			SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
+#endif
+		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
+
+		/* Initialize the ring buffer's read and write pointers */
+		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
+
+		/* set the wb address whether it's enabled or not */
+		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
+		       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
+		       ((ring->rptr_gpu_addr) & 0xFFFFFFFC));
+
+		rb_cntl |= SDMA0_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
+
+		WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+		WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
+
+		ring->wptr = 0;
+		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+
+		/* enable DMA RB */
+		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
+		       rb_cntl | SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK);
+
+		ib_cntl = SDMA0_GFX_IB_CNTL__IB_ENABLE_MASK;
+#ifdef __BIG_ENDIAN
+		ib_cntl |= SDMA0_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
+#endif
+		/* enable DMA IBs */
+		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
+	}
+
+	cik_sdma_enable(adev, true);
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * cik_sdma_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_rlc_resume(struct amdgpu_device *adev)
+{
+	/* XXX todo */
+	return 0;
+}
+
+/**
+ * cik_sdma_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int cik_sdma_load_microcode(struct amdgpu_device *adev)
+{
+	const struct sdma_firmware_header_v1_0 *hdr;
+	const __le32 *fw_data;
+	u32 fw_size;
+	int i, j;
+
+	/* halt the MEs */
+	cik_sdma_enable(adev, false);
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		if (!adev->sdma.instance[i].fw)
+			return -EINVAL;
+		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+		amdgpu_ucode_print_sdma_hdr(&hdr->header);
+		fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+		adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
+		adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+		if (adev->sdma.instance[i].feature_version >= 20)
+			adev->sdma.instance[i].burst_nop = true;
+		fw_data = (const __le32 *)
+			(adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+		WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
+		for (j = 0; j < fw_size; j++)
+			WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
+		WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
+	}
+
+	return 0;
+}
+
+/**
+ * cik_sdma_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_start(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = cik_sdma_load_microcode(adev);
+	if (r)
+		return r;
+
+	/* halt the engine before programing */
+	cik_sdma_enable(adev, false);
+	/* enable sdma ring preemption */
+	cik_ctx_switch_enable(adev, true);
+
+	/* start the gfx rings and rlc compute queues */
+	r = cik_sdma_gfx_resume(adev);
+	if (r)
+		return r;
+	r = cik_sdma_rlc_resume(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+/**
+ * cik_sdma_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned i;
+	unsigned index;
+	int r;
+	u32 tmp;
+	u64 gpu_addr;
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	tmp = 0xCAFEDEAD;
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+
+	r = amdgpu_ring_alloc(ring, 5);
+	if (r)
+		goto error_free_wb;
+
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = le32_to_cpu(adev->wb.wb[index]);
+		if (tmp == 0xDEADBEEF)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+error_free_wb:
+	amdgpu_device_wb_free(adev, index);
+	return r;
+}
+
+/**
+ * cik_sdma_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Test a simple IB in the DMA ring (CIK).
+ * Returns 0 on success, error on failure.
+ */
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+	unsigned index;
+	u32 tmp = 0;
+	u64 gpu_addr;
+	long r;
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	tmp = 0xCAFEDEAD;
+	adev->wb.wb[index] = cpu_to_le32(tmp);
+	memset(&ib, 0, sizeof(ib));
+	r = amdgpu_ib_get(adev, NULL, 256,
+					AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r)
+		goto err0;
+
+	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib.ptr[1] = lower_32_bits(gpu_addr);
+	ib.ptr[2] = upper_32_bits(gpu_addr);
+	ib.ptr[3] = 1;
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r)
+		goto err1;
+
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto err1;
+	} else if (r < 0) {
+		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF)
+		r = 0;
+	else
+		r = -EINVAL;
+
+err1:
+	amdgpu_ib_free(&ib, NULL);
+	dma_fence_put(f);
+err0:
+	amdgpu_device_wb_free(adev, index);
+	return r;
+}
+
+/**
+ * cik_sdma_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (CIK).
+ */
+static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
+				 uint64_t pe, uint64_t src,
+				 unsigned count)
+{
+	unsigned bytes = count * 8;
+
+	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+		SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib->ptr[ib->length_dw++] = bytes;
+	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+	ib->ptr[ib->length_dw++] = lower_32_bits(src);
+	ib->ptr[ib->length_dw++] = upper_32_bits(src);
+	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+}
+
+/**
+ * cik_sdma_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @value: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ *
+ * Update PTEs by writing them manually using sDMA (CIK).
+ */
+static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+				  uint64_t value, unsigned count,
+				  uint32_t incr)
+{
+	unsigned ndw = count * 2;
+
+	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+		SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+	ib->ptr[ib->length_dw++] = ndw;
+	for (; ndw > 0; ndw -= 2) {
+		ib->ptr[ib->length_dw++] = lower_32_bits(value);
+		ib->ptr[ib->length_dw++] = upper_32_bits(value);
+		value += incr;
+	}
+}
+
+/**
+ * cik_sdma_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
+				    uint64_t addr, unsigned count,
+				    uint32_t incr, uint64_t flags)
+{
+	/* for physically contiguous pages (vram) */
+	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+	ib->ptr[ib->length_dw++] = incr; /* increment size */
+	ib->ptr[ib->length_dw++] = 0;
+	ib->ptr[ib->length_dw++] = count; /* number of entries */
+}
+
+/**
+ * cik_sdma_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+	u32 pad_count;
+	int i;
+
+	pad_count = (-ib->length_dw) & 7;
+	for (i = 0; i < pad_count; i++)
+		if (sdma && sdma->burst_nop && (i == 0))
+			ib->ptr[ib->length_dw++] =
+					SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
+					SDMA_NOP_COUNT(pad_count - 1);
+		else
+			ib->ptr[ib->length_dw++] =
+					SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+}
+
+/**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+					    SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+					    SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+					    SDMA_POLL_REG_MEM_EXTRA_M));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
+	amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
+/**
+ * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (CIK).
+ */
+static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					unsigned vmid, uint64_t pd_addr)
+{
+	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+			  SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
+
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 0); /* reference */
+	amdgpu_ring_write(ring, 0); /* mask */
+	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
+}
+
+static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
+				    uint32_t reg, uint32_t val)
+{
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, val);
+}
+
+static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
+				 bool enable)
+{
+	u32 orig, data;
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+		WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+		WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
+	} else {
+		orig = data = RREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+		data |= 0xff000000;
+		if (data != orig)
+			WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+		data |= 0xff000000;
+		if (data != orig)
+			WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+	}
+}
+
+static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
+				 bool enable)
+{
+	u32 orig, data;
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+		orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+		data |= 0x100;
+		if (orig != data)
+			WREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+		data |= 0x100;
+		if (orig != data)
+			WREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+	} else {
+		orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+		data &= ~0x100;
+		if (orig != data)
+			WREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+		data &= ~0x100;
+		if (orig != data)
+			WREG32(mmSDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+	}
+}
+
+static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	adev->sdma.num_instances = SDMA_MAX_INSTANCE;
+
+	r = cik_sdma_init_microcode(adev);
+	if (r)
+		return r;
+
+	cik_sdma_set_ring_funcs(adev);
+	cik_sdma_set_irq_funcs(adev);
+	cik_sdma_set_buffer_funcs(adev);
+	cik_sdma_set_vm_pte_funcs(adev);
+
+	return 0;
+}
+
+static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_ring *ring;
+	struct amdgpu_device *adev = ip_block->adev;
+	int r, i;
+
+	/* SDMA trap event */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
+			      &adev->sdma.trap_irq);
+	if (r)
+		return r;
+
+	/* SDMA Privileged inst */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
+			      &adev->sdma.illegal_inst_irq);
+	if (r)
+		return r;
+
+	/* SDMA Privileged inst */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247,
+			      &adev->sdma.illegal_inst_irq);
+	if (r)
+		return r;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
+		ring->ring_obj = NULL;
+		sprintf(ring->name, "sdma%d", i);
+		r = amdgpu_ring_init(adev, ring, 1024,
+				     &adev->sdma.trap_irq,
+				     (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
+				     AMDGPU_SDMA_IRQ_INSTANCE1,
+				     AMDGPU_RING_PRIO_DEFAULT, NULL);
+		if (r)
+			return r;
+	}
+
+	return r;
+}
+
+static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+	cik_sdma_free_microcode(adev);
+	return 0;
+}
+
+static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	return cik_sdma_start(adev);
+}
+
+static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	cik_ctx_switch_enable(adev, false);
+	cik_sdma_enable(adev, false);
+
+	return 0;
+}
+
+static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return cik_sdma_hw_fini(ip_block);
+}
+
+static int cik_sdma_resume(struct amdgpu_ip_block *ip_block)
+{
+	cik_sdma_soft_reset(ip_block);
+
+	return cik_sdma_hw_init(ip_block);
+}
+
+static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 tmp = RREG32(mmSRBM_STATUS2);
+
+	if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
+				SRBM_STATUS2__SDMA1_BUSY_MASK))
+	    return false;
+
+	return true;
+}
+
+static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (cik_sdma_is_idle(ip_block))
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 srbm_soft_reset = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 tmp;
+
+	/* sdma0 */
+	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
+	tmp |= SDMA0_F32_CNTL__HALT_MASK;
+	WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+	srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+
+	/* sdma1 */
+	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
+	tmp |= SDMA0_F32_CNTL__HALT_MASK;
+	WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+	srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+
+	return 0;
+}
+
+static int cik_sdma_set_trap_irq_state(struct amdgpu_device *adev,
+				       struct amdgpu_irq_src *src,
+				       unsigned type,
+				       enum amdgpu_interrupt_state state)
+{
+	u32 sdma_cntl;
+
+	switch (type) {
+	case AMDGPU_SDMA_IRQ_INSTANCE0:
+		switch (state) {
+		case AMDGPU_IRQ_STATE_DISABLE:
+			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+			sdma_cntl &= ~SDMA0_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+			break;
+		case AMDGPU_IRQ_STATE_ENABLE:
+			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
+			sdma_cntl |= SDMA0_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
+			break;
+		default:
+			break;
+		}
+		break;
+	case AMDGPU_SDMA_IRQ_INSTANCE1:
+		switch (state) {
+		case AMDGPU_IRQ_STATE_DISABLE:
+			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+			sdma_cntl &= ~SDMA0_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+			break;
+		case AMDGPU_IRQ_STATE_ENABLE:
+			sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
+			sdma_cntl |= SDMA0_CNTL__TRAP_ENABLE_MASK;
+			WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int cik_sdma_process_trap_irq(struct amdgpu_device *adev,
+				     struct amdgpu_irq_src *source,
+				     struct amdgpu_iv_entry *entry)
+{
+	u8 instance_id, queue_id;
+
+	instance_id = (entry->ring_id & 0x3) >> 0;
+	queue_id = (entry->ring_id & 0xc) >> 2;
+	DRM_DEBUG("IH: SDMA trap\n");
+	switch (instance_id) {
+	case 0:
+		switch (queue_id) {
+		case 0:
+			amdgpu_fence_process(&adev->sdma.instance[0].ring);
+			break;
+		case 1:
+			/* XXX compute */
+			break;
+		case 2:
+			/* XXX compute */
+			break;
+		}
+		break;
+	case 1:
+		switch (queue_id) {
+		case 0:
+			amdgpu_fence_process(&adev->sdma.instance[1].ring);
+			break;
+		case 1:
+			/* XXX compute */
+			break;
+		case 2:
+			/* XXX compute */
+			break;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
+					     struct amdgpu_irq_src *source,
+					     struct amdgpu_iv_entry *entry)
+{
+	u8 instance_id;
+
+	DRM_ERROR("Illegal instruction in SDMA command stream\n");
+	instance_id = (entry->ring_id & 0x3) >> 0;
+	drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
+	return 0;
+}
+
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	bool gate = false;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (state == AMD_CG_STATE_GATE)
+		gate = true;
+
+	cik_enable_sdma_mgcg(adev, gate);
+	cik_enable_sdma_mgls(adev, gate);
+
+	return 0;
+}
+
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs cik_sdma_ip_funcs = {
+	.name = "cik_sdma",
+	.early_init = cik_sdma_early_init,
+	.sw_init = cik_sdma_sw_init,
+	.sw_fini = cik_sdma_sw_fini,
+	.hw_init = cik_sdma_hw_init,
+	.hw_fini = cik_sdma_hw_fini,
+	.suspend = cik_sdma_suspend,
+	.resume = cik_sdma_resume,
+	.is_idle = cik_sdma_is_idle,
+	.wait_for_idle = cik_sdma_wait_for_idle,
+	.soft_reset = cik_sdma_soft_reset,
+	.set_clockgating_state = cik_sdma_set_clockgating_state,
+	.set_powergating_state = cik_sdma_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_SDMA,
+	.align_mask = 0xf,
+	.nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0),
+	.support_64bit_ptrs = false,
+	.get_rptr = cik_sdma_ring_get_rptr,
+	.get_wptr = cik_sdma_ring_get_wptr,
+	.set_wptr = cik_sdma_ring_set_wptr,
+	.emit_frame_size =
+		6 + /* cik_sdma_ring_emit_hdp_flush */
+		3 + /* hdp invalidate */
+		6 + /* cik_sdma_ring_emit_pipeline_sync */
+		CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
+		9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+	.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
+	.emit_ib = cik_sdma_ring_emit_ib,
+	.emit_fence = cik_sdma_ring_emit_fence,
+	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
+	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
+	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+	.test_ring = cik_sdma_ring_test_ring,
+	.test_ib = cik_sdma_ring_test_ib,
+	.insert_nop = cik_sdma_ring_insert_nop,
+	.pad_ib = cik_sdma_ring_pad_ib,
+	.emit_wreg = cik_sdma_ring_emit_wreg,
+};
+
+static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs;
+		adev->sdma.instance[i].ring.me = i;
+	}
+}
+
+static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = {
+	.set = cik_sdma_set_trap_irq_state,
+	.process = cik_sdma_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs cik_sdma_illegal_inst_irq_funcs = {
+	.process = cik_sdma_process_illegal_inst_irq,
+};
+
+static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+	adev->sdma.trap_irq.funcs = &cik_sdma_trap_irq_funcs;
+	adev->sdma.illegal_inst_irq.funcs = &cik_sdma_illegal_inst_irq_funcs;
+}
+
+/**
+ * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to copy to
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ * @copy_flags: unused
+ *
+ * Copy GPU buffers using the DMA engine (CIK).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
+				      uint64_t src_offset,
+				      uint64_t dst_offset,
+				      uint32_t byte_count,
+				      uint32_t copy_flags)
+{
+	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
+	ib->ptr[ib->length_dw++] = byte_count;
+	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ib: indirect buffer to fill
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (CIK).
+ */
+static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib,
+				      uint32_t src_data,
+				      uint64_t dst_offset,
+				      uint32_t byte_count)
+{
+	ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0);
+	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+	ib->ptr[ib->length_dw++] = src_data;
+	ib->ptr[ib->length_dw++] = byte_count;
+}
+
+static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
+	.copy_max_bytes = 0x1fffff,
+	.copy_num_dw = 7,
+	.emit_copy_buffer = cik_sdma_emit_copy_buffer,
+
+	.fill_max_bytes = 0x1fffff,
+	.fill_num_dw = 5,
+	.emit_fill_buffer = cik_sdma_emit_fill_buffer,
+};
+
+static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
+{
+	adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
+	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+}
+
+static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
+	.copy_pte = cik_sdma_vm_copy_pte,
+
+	.write_pte = cik_sdma_vm_write_pte,
+	.set_pte_pde = cik_sdma_vm_set_pte_pde,
+};
+
+static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->vm_manager.vm_pte_scheds[i] =
+			&adev->sdma.instance[i].ring.sched;
+	}
+	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
+}
+
+const struct amdgpu_ip_block_version cik_sdma_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_SDMA,
+	.major = 2,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cik_sdma_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.h b/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
new file mode 100644
index 000000000000..a4a8fe01410b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CIK_SDMA_H__
+#define __CIK_SDMA_H__
+
+extern const struct amdgpu_ip_block_version cik_sdma_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
new file mode 100644
index 000000000000..8aca4f2734f2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -0,0 +1,614 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef CIK_H
+#define CIK_H
+
+#define MC_SEQ_MISC0__MT__MASK	0xf0000000
+#define MC_SEQ_MISC0__MT__GDDR1  0x10000000
+#define MC_SEQ_MISC0__MT__DDR2   0x20000000
+#define MC_SEQ_MISC0__MT__GDDR3  0x30000000
+#define MC_SEQ_MISC0__MT__GDDR4  0x40000000
+#define MC_SEQ_MISC0__MT__GDDR5  0x50000000
+#define MC_SEQ_MISC0__MT__HBM    0x60000000
+#define MC_SEQ_MISC0__MT__DDR3   0xB0000000
+
+#define CP_ME_TABLE_SIZE    96
+
+/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
+#define CRTC0_REGISTER_OFFSET                 (0x1b7c - 0x1b7c)
+#define CRTC1_REGISTER_OFFSET                 (0x1e7c - 0x1b7c)
+#define CRTC2_REGISTER_OFFSET                 (0x417c - 0x1b7c)
+#define CRTC3_REGISTER_OFFSET                 (0x447c - 0x1b7c)
+#define CRTC4_REGISTER_OFFSET                 (0x477c - 0x1b7c)
+#define CRTC5_REGISTER_OFFSET                 (0x4a7c - 0x1b7c)
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET                 (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET                 (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET                 (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET                 (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET                 (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET                 (0x1816 - 0x1807)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET                 (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET                 (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET                 (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET                 (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET                 (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET                 (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET                 (0x17a4 - 0x1780)
+
+#define		PIPEID(x)					((x) << 0)
+#define		MEID(x)						((x) << 2)
+#define		VMID(x)						((x) << 4)
+#define		QUEUEID(x)					((x) << 8)
+
+#define mmCC_DRM_ID_STRAPS				0x1559
+#define CC_DRM_ID_STRAPS__ATI_REV_ID_MASK		0xf0000000
+
+#define mmCHUB_CONTROL					0x619
+#define		BYPASS_VM					(1 << 0)
+
+#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
+
+#define mmGRPH_LUT_10BIT_BYPASS_CONTROL			0x1a02
+#define		LUT_10BIT_BYPASS_EN			(1 << 8)
+
+#       define CURSOR_MONO                    0
+#       define CURSOR_24_1                    1
+#       define CURSOR_24_8_PRE_MULT           2
+#       define CURSOR_24_8_UNPRE_MULT         3
+#       define CURSOR_URGENT_ALWAYS           0
+#       define CURSOR_URGENT_1_8              1
+#       define CURSOR_URGENT_1_4              2
+#       define CURSOR_URGENT_3_8              3
+#       define CURSOR_URGENT_1_2              4
+
+#       define GRPH_DEPTH_8BPP                0
+#       define GRPH_DEPTH_16BPP               1
+#       define GRPH_DEPTH_32BPP               2
+/* 8 BPP */
+#       define GRPH_FORMAT_INDEXED            0
+/* 16 BPP */
+#       define GRPH_FORMAT_ARGB1555           0
+#       define GRPH_FORMAT_ARGB565            1
+#       define GRPH_FORMAT_ARGB4444           2
+#       define GRPH_FORMAT_AI88               3
+#       define GRPH_FORMAT_MONO16             4
+#       define GRPH_FORMAT_BGRA5551           5
+/* 32 BPP */
+#       define GRPH_FORMAT_ARGB8888           0
+#       define GRPH_FORMAT_ARGB2101010        1
+#       define GRPH_FORMAT_32BPP_DIG          2
+#       define GRPH_FORMAT_8B_ARGB2101010     3
+#       define GRPH_FORMAT_BGRA1010102        4
+#       define GRPH_FORMAT_8B_BGRA1010102     5
+#       define GRPH_FORMAT_RGB111110          6
+#       define GRPH_FORMAT_BGR101111          7
+#       define ADDR_SURF_MACRO_TILE_ASPECT_1  0
+#       define ADDR_SURF_MACRO_TILE_ASPECT_2  1
+#       define ADDR_SURF_MACRO_TILE_ASPECT_4  2
+#       define ADDR_SURF_MACRO_TILE_ASPECT_8  3
+#       define GRPH_ARRAY_LINEAR_GENERAL      0
+#       define GRPH_ARRAY_LINEAR_ALIGNED      1
+#       define GRPH_ARRAY_1D_TILED_THIN1      2
+#       define GRPH_ARRAY_2D_TILED_THIN1      4
+#       define DISPLAY_MICRO_TILING          0
+#       define THIN_MICRO_TILING             1
+#       define DEPTH_MICRO_TILING            2
+#       define ROTATED_MICRO_TILING          4
+#       define GRPH_ENDIAN_NONE               0
+#       define GRPH_ENDIAN_8IN16              1
+#       define GRPH_ENDIAN_8IN32              2
+#       define GRPH_ENDIAN_8IN64              3
+#       define GRPH_RED_SEL_R                 0
+#       define GRPH_RED_SEL_G                 1
+#       define GRPH_RED_SEL_B                 2
+#       define GRPH_RED_SEL_A                 3
+#       define GRPH_GREEN_SEL_G               0
+#       define GRPH_GREEN_SEL_B               1
+#       define GRPH_GREEN_SEL_A               2
+#       define GRPH_GREEN_SEL_R               3
+#       define GRPH_BLUE_SEL_B                0
+#       define GRPH_BLUE_SEL_A                1
+#       define GRPH_BLUE_SEL_R                2
+#       define GRPH_BLUE_SEL_G                3
+#       define GRPH_ALPHA_SEL_A               0
+#       define GRPH_ALPHA_SEL_R               1
+#       define GRPH_ALPHA_SEL_G               2
+#       define GRPH_ALPHA_SEL_B               3
+#       define INPUT_GAMMA_USE_LUT                  0
+#       define INPUT_GAMMA_BYPASS                   1
+#       define INPUT_GAMMA_SRGB_24                  2
+#       define INPUT_GAMMA_XVYCC_222                3
+
+#       define INPUT_CSC_BYPASS                     0
+#       define INPUT_CSC_PROG_COEFF                 1
+#       define INPUT_CSC_PROG_SHARED_MATRIXA        2
+
+#       define OUTPUT_CSC_BYPASS                    0
+#       define OUTPUT_CSC_TV_RGB                    1
+#       define OUTPUT_CSC_YCBCR_601                 2
+#       define OUTPUT_CSC_YCBCR_709                 3
+#       define OUTPUT_CSC_PROG_COEFF                4
+#       define OUTPUT_CSC_PROG_SHARED_MATRIXB       5
+
+#       define DEGAMMA_BYPASS                       0
+#       define DEGAMMA_SRGB_24                      1
+#       define DEGAMMA_XVYCC_222                    2
+#       define GAMUT_REMAP_BYPASS                   0
+#       define GAMUT_REMAP_PROG_COEFF               1
+#       define GAMUT_REMAP_PROG_SHARED_MATRIXA      2
+#       define GAMUT_REMAP_PROG_SHARED_MATRIXB      3
+
+#       define REGAMMA_BYPASS                       0
+#       define REGAMMA_SRGB_24                      1
+#       define REGAMMA_XVYCC_222                    2
+#       define REGAMMA_PROG_A                       3
+#       define REGAMMA_PROG_B                       4
+
+#       define FMT_CLAMP_6BPC                0
+#       define FMT_CLAMP_8BPC                1
+#       define FMT_CLAMP_10BPC               2
+
+#       define HDMI_24BIT_DEEP_COLOR         0
+#       define HDMI_30BIT_DEEP_COLOR         1
+#       define HDMI_36BIT_DEEP_COLOR         2
+#       define HDMI_ACR_HW                   0
+#       define HDMI_ACR_32                   1
+#       define HDMI_ACR_44                   2
+#       define HDMI_ACR_48                   3
+#       define HDMI_ACR_X1                   1
+#       define HDMI_ACR_X2                   2
+#       define HDMI_ACR_X4                   4
+#       define AFMT_AVI_INFO_Y_RGB           0
+#       define AFMT_AVI_INFO_Y_YCBCR422      1
+#       define AFMT_AVI_INFO_Y_YCBCR444      2
+
+#define			NO_AUTO						0
+#define			ES_AUTO						1
+#define			GS_AUTO						2
+#define			ES_AND_GS_AUTO					3
+
+#       define ARRAY_MODE(x)					((x) << 2)
+#       define PIPE_CONFIG(x)					((x) << 6)
+#       define TILE_SPLIT(x)					((x) << 11)
+#       define MICRO_TILE_MODE_NEW(x)				((x) << 22)
+#       define SAMPLE_SPLIT(x)					((x) << 25)
+#       define BANK_WIDTH(x)					((x) << 0)
+#       define BANK_HEIGHT(x)					((x) << 2)
+#       define MACRO_TILE_ASPECT(x)				((x) << 4)
+#       define NUM_BANKS(x)					((x) << 6)
+
+#define		MSG_ENTER_RLC_SAFE_MODE			1
+#define		MSG_EXIT_RLC_SAFE_MODE			0
+
+/*
+ * PM4
+ */
+#define	PACKET_TYPE0	0
+#define	PACKET_TYPE1	1
+#define	PACKET_TYPE2	2
+#define	PACKET_TYPE3	3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
+			 ((reg) & 0xFFFF) |			\
+			 ((n) & 0x3FFF) << 16)
+#define CP_PACKET2			0x80000000
+#define		PACKET2_PAD_SHIFT		0
+#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
+
+#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define	PACKET3_NOP					0x10
+#define	PACKET3_SET_BASE				0x11
+#define		PACKET3_BASE_INDEX(x)                  ((x) << 0)
+#define			CE_PARTITION_BASE		3
+#define	PACKET3_CLEAR_STATE				0x12
+#define	PACKET3_INDEX_BUFFER_SIZE			0x13
+#define	PACKET3_DISPATCH_DIRECT				0x15
+#define	PACKET3_DISPATCH_INDIRECT			0x16
+#define	PACKET3_ATOMIC_GDS				0x1D
+#define	PACKET3_ATOMIC_MEM				0x1E
+#define	PACKET3_OCCLUSION_QUERY				0x1F
+#define	PACKET3_SET_PREDICATION				0x20
+#define	PACKET3_REG_RMW					0x21
+#define	PACKET3_COND_EXEC				0x22
+#define	PACKET3_PRED_EXEC				0x23
+#define	PACKET3_DRAW_INDIRECT				0x24
+#define	PACKET3_DRAW_INDEX_INDIRECT			0x25
+#define	PACKET3_INDEX_BASE				0x26
+#define	PACKET3_DRAW_INDEX_2				0x27
+#define	PACKET3_CONTEXT_CONTROL				0x28
+#define	PACKET3_INDEX_TYPE				0x2A
+#define	PACKET3_DRAW_INDIRECT_MULTI			0x2C
+#define	PACKET3_DRAW_INDEX_AUTO				0x2D
+#define	PACKET3_NUM_INSTANCES				0x2F
+#define	PACKET3_DRAW_INDEX_MULTI_AUTO			0x30
+#define	PACKET3_INDIRECT_BUFFER_CONST			0x33
+#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
+#define	PACKET3_DRAW_INDEX_OFFSET_2			0x35
+#define	PACKET3_DRAW_PREAMBLE				0x36
+#define	PACKET3_WRITE_DATA				0x37
+#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
+		/* 0 - register
+		 * 1 - memory (sync - via GRBM)
+		 * 2 - gl2
+		 * 3 - gds
+		 * 4 - reserved
+		 * 5 - memory (async - direct)
+		 */
+#define		WR_ONE_ADDR                             (1 << 16)
+#define		WR_CONFIRM                              (1 << 20)
+#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
+		/* 0 - LRU
+		 * 1 - Stream
+		 */
+#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
+		/* 0 - me
+		 * 1 - pfp
+		 * 2 - ce
+		 */
+#define	PACKET3_DRAW_INDEX_INDIRECT_MULTI		0x38
+#define	PACKET3_MEM_SEMAPHORE				0x39
+#              define PACKET3_SEM_USE_MAILBOX       (0x1 << 16)
+#              define PACKET3_SEM_SEL_SIGNAL_TYPE   (0x1 << 20) /* 0 = increment, 1 = write 1 */
+#              define PACKET3_SEM_CLIENT_CODE	    ((x) << 24) /* 0 = CP, 1 = CB, 2 = DB */
+#              define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
+#              define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
+#define	PACKET3_COPY_DW					0x3B
+#define	PACKET3_WAIT_REG_MEM				0x3C
+#define		WAIT_REG_MEM_FUNCTION(x)                ((x) << 0)
+		/* 0 - always
+		 * 1 - <
+		 * 2 - <=
+		 * 3 - ==
+		 * 4 - !=
+		 * 5 - >=
+		 * 6 - >
+		 */
+#define		WAIT_REG_MEM_MEM_SPACE(x)               ((x) << 4)
+		/* 0 - reg
+		 * 1 - mem
+		 */
+#define		WAIT_REG_MEM_OPERATION(x)               ((x) << 6)
+		/* 0 - wait_reg_mem
+		 * 1 - wr_wait_wr_reg
+		 */
+#define		WAIT_REG_MEM_ENGINE(x)                  ((x) << 8)
+		/* 0 - me
+		 * 1 - pfp
+		 */
+#define	PACKET3_INDIRECT_BUFFER				0x3F
+#define		INDIRECT_BUFFER_TCL2_VOLATILE           (1 << 22)
+#define		INDIRECT_BUFFER_VALID                   (1 << 23)
+#define		INDIRECT_BUFFER_CACHE_POLICY(x)         ((x) << 28)
+		/* 0 - LRU
+		 * 1 - Stream
+		 * 2 - Bypass
+		 */
+#define	PACKET3_COPY_DATA				0x40
+#define	PACKET3_PFP_SYNC_ME				0x42
+#define	PACKET3_SURFACE_SYNC				0x43
+#              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
+#              define PACKET3_DEST_BASE_1_ENA      (1 << 1)
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
+#              define PACKET3_CB2_DEST_BASE_ENA    (1 << 8)
+#              define PACKET3_CB3_DEST_BASE_ENA    (1 << 9)
+#              define PACKET3_CB4_DEST_BASE_ENA    (1 << 10)
+#              define PACKET3_CB5_DEST_BASE_ENA    (1 << 11)
+#              define PACKET3_CB6_DEST_BASE_ENA    (1 << 12)
+#              define PACKET3_CB7_DEST_BASE_ENA    (1 << 13)
+#              define PACKET3_DB_DEST_BASE_ENA     (1 << 14)
+#              define PACKET3_TCL1_VOL_ACTION_ENA  (1 << 15)
+#              define PACKET3_TC_VOL_ACTION_ENA    (1 << 16) /* L2 */
+#              define PACKET3_TC_WB_ACTION_ENA     (1 << 18) /* L2 */
+#              define PACKET3_DEST_BASE_2_ENA      (1 << 19)
+#              define PACKET3_DEST_BASE_3_ENA      (1 << 21)
+#              define PACKET3_TCL1_ACTION_ENA      (1 << 22)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23) /* L2 */
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+#              define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28)
+#              define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define	PACKET3_COND_WRITE				0x45
+#define	PACKET3_EVENT_WRITE				0x46
+#define		EVENT_TYPE(x)                           ((x) << 0)
+#define		EVENT_INDEX(x)                          ((x) << 8)
+		/* 0 - any non-TS event
+		 * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+		 * 2 - SAMPLE_PIPELINESTAT
+		 * 3 - SAMPLE_STREAMOUTSTAT*
+		 * 4 - *S_PARTIAL_FLUSH
+		 * 5 - EOP events
+		 * 6 - EOS events
+		 */
+#define	PACKET3_EVENT_WRITE_EOP				0x47
+#define		EOP_TCL1_VOL_ACTION_EN                  (1 << 12)
+#define		EOP_TC_VOL_ACTION_EN                    (1 << 13) /* L2 */
+#define		EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
+#define		EOP_TCL1_ACTION_EN                      (1 << 16)
+#define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
+#define		EOP_TCL2_VOLATILE                       (1 << 24)
+#define		EOP_CACHE_POLICY(x)                     ((x) << 25)
+		/* 0 - LRU
+		 * 1 - Stream
+		 * 2 - Bypass
+		 */
+#define		EOP_EXEC				(1 << 28) /* For Trailing Fence */
+#define		DATA_SEL(x)                             ((x) << 29)
+		/* 0 - discard
+		 * 1 - send low 32bit data
+		 * 2 - send 64bit data
+		 * 3 - send 64bit GPU counter value
+		 * 4 - send 64bit sys counter value
+		 */
+#define		INT_SEL(x)                              ((x) << 24)
+		/* 0 - none
+		 * 1 - interrupt only (DATA_SEL = 0)
+		 * 2 - interrupt when data write is confirmed
+		 */
+#define		DST_SEL(x)                              ((x) << 16)
+		/* 0 - MC
+		 * 1 - TC/L2
+		 */
+#define	PACKET3_EVENT_WRITE_EOS				0x48
+#define	PACKET3_RELEASE_MEM				0x49
+#define	PACKET3_PREAMBLE_CNTL				0x4A
+#              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)
+#              define PACKET3_PREAMBLE_END_CLEAR_STATE       (3 << 28)
+#define	PACKET3_DMA_DATA				0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+/* CONTROL */
+#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
+		/* 0 - ME
+		 * 1 - PFP
+		 */
+#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+		/* 0 - LRU
+		 * 1 - Stream
+		 * 2 - Bypass
+		 */
+#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
+#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
+		/* 0 - DST_ADDR using DAS
+		 * 1 - GDS
+		 * 3 - DST_ADDR using L2
+		 */
+#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+		/* 0 - LRU
+		 * 1 - Stream
+		 * 2 - Bypass
+		 */
+#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
+#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
+		/* 0 - SRC_ADDR using SAS
+		 * 1 - GDS
+		 * 2 - DATA
+		 * 3 - SRC_ADDR using L2
+		 */
+#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
+/* COMMAND */
+#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
+#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
+		/* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
+		/* 0 - none
+		 * 1 - 8 in 16
+		 * 2 - 8 in 32
+		 * 3 - 8 in 64
+		 */
+#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
+		/* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
+		/* 0 - memory
+		 * 1 - register
+		 */
+#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
+#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
+#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
+#define	PACKET3_ACQUIRE_MEM				0x58
+#define	PACKET3_REWIND					0x59
+#define	PACKET3_LOAD_UCONFIG_REG			0x5E
+#define	PACKET3_LOAD_SH_REG				0x5F
+#define	PACKET3_LOAD_CONFIG_REG				0x60
+#define	PACKET3_LOAD_CONTEXT_REG			0x61
+#define	PACKET3_SET_CONFIG_REG				0x68
+#define		PACKET3_SET_CONFIG_REG_START			0x00002000
+#define		PACKET3_SET_CONFIG_REG_END			0x00002c00
+#define	PACKET3_SET_CONTEXT_REG				0x69
+#define		PACKET3_SET_CONTEXT_REG_START			0x0000a000
+#define		PACKET3_SET_CONTEXT_REG_END			0x0000a400
+#define	PACKET3_SET_CONTEXT_REG_INDIRECT		0x73
+#define	PACKET3_SET_SH_REG				0x76
+#define		PACKET3_SET_SH_REG_START			0x00002c00
+#define		PACKET3_SET_SH_REG_END				0x00003000
+#define	PACKET3_SET_SH_REG_OFFSET			0x77
+#define	PACKET3_SET_QUEUE_REG				0x78
+#define	PACKET3_SET_UCONFIG_REG				0x79
+#define		PACKET3_SET_UCONFIG_REG_START			0x0000c000
+#define		PACKET3_SET_UCONFIG_REG_END			0x0000c400
+#define	PACKET3_SCRATCH_RAM_WRITE			0x7D
+#define	PACKET3_SCRATCH_RAM_READ			0x7E
+#define	PACKET3_LOAD_CONST_RAM				0x80
+#define	PACKET3_WRITE_CONST_RAM				0x81
+#define	PACKET3_DUMP_CONST_RAM				0x83
+#define	PACKET3_INCREMENT_CE_COUNTER			0x84
+#define	PACKET3_INCREMENT_DE_COUNTER			0x85
+#define	PACKET3_WAIT_ON_CE_COUNTER			0x86
+#define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
+#define	PACKET3_SWITCH_BUFFER				0x8B
+
+/* SDMA - first instance at 0xd000, second at 0xd800 */
+#define SDMA0_REGISTER_OFFSET                             0x0 /* not a register */
+#define SDMA1_REGISTER_OFFSET                             0x200 /* not a register */
+#define SDMA_MAX_INSTANCE 2
+
+#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
+					 (((sub_op) & 0xFF) << 8) |	\
+					 (((op) & 0xFF) << 0))
+/* sDMA opcodes */
+#define	SDMA_OPCODE_NOP					  0
+#	define SDMA_NOP_COUNT(x)			  (((x) & 0x3FFF) << 16)
+#define	SDMA_OPCODE_COPY				  1
+#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
+#       define SDMA_COPY_SUB_OPCODE_TILED                 1
+#       define SDMA_COPY_SUB_OPCODE_SOA                   3
+#       define SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW     4
+#       define SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW      5
+#       define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW        6
+#define	SDMA_OPCODE_WRITE				  2
+#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
+#       define SDMA_WRITE_SUB_OPCODE_TILED                1
+#define	SDMA_OPCODE_INDIRECT_BUFFER			  4
+#define	SDMA_OPCODE_FENCE				  5
+#define	SDMA_OPCODE_TRAP				  6
+#define	SDMA_OPCODE_SEMAPHORE				  7
+#       define SDMA_SEMAPHORE_EXTRA_O                     (1 << 13)
+		/* 0 - increment
+		 * 1 - write 1
+		 */
+#       define SDMA_SEMAPHORE_EXTRA_S                     (1 << 14)
+		/* 0 - wait
+		 * 1 - signal
+		 */
+#       define SDMA_SEMAPHORE_EXTRA_M                     (1 << 15)
+		/* mailbox */
+#define	SDMA_OPCODE_POLL_REG_MEM			  8
+#       define SDMA_POLL_REG_MEM_EXTRA_OP(x)              ((x) << 10)
+		/* 0 - wait_reg_mem
+		 * 1 - wr_wait_wr_reg
+		 */
+#       define SDMA_POLL_REG_MEM_EXTRA_FUNC(x)            ((x) << 12)
+		/* 0 - always
+		 * 1 - <
+		 * 2 - <=
+		 * 3 - ==
+		 * 4 - !=
+		 * 5 - >=
+		 * 6 - >
+		 */
+#       define SDMA_POLL_REG_MEM_EXTRA_M                  (1 << 15)
+		/* 0 = register
+		 * 1 = memory
+		 */
+#define	SDMA_OPCODE_COND_EXEC				  9
+#define	SDMA_OPCODE_CONSTANT_FILL			  11
+#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
+		/* 0 = byte fill
+		 * 2 = DW fill
+		 */
+#define	SDMA_OPCODE_GENERATE_PTE_PDE			  12
+#define	SDMA_OPCODE_TIMESTAMP				  13
+#       define SDMA_TIMESTAMP_SUB_OPCODE_SET_LOCAL        0
+#       define SDMA_TIMESTAMP_SUB_OPCODE_GET_LOCAL        1
+#       define SDMA_TIMESTAMP_SUB_OPCODE_GET_GLOBAL       2
+#define	SDMA_OPCODE_SRBM_WRITE				  14
+#       define SDMA_SRBM_WRITE_EXTRA_BYTE_ENABLE(x)       ((x) << 12)
+		/* byte mask */
+
+#define VCE_CMD_NO_OP		0x00000000
+#define VCE_CMD_END		0x00000001
+#define VCE_CMD_IB		0x00000002
+#define VCE_CMD_FENCE		0x00000003
+#define VCE_CMD_TRAP		0x00000004
+#define VCE_CMD_IB_AUTO		0x00000005
+#define VCE_CMD_SEMAPHORE	0x00000006
+
+/* if PTR32, these are the bases for scratch and lds */
+#define	PRIVATE_BASE(x)	((x) << 0) /* scratch */
+#define	SHARED_BASE(x)	((x) << 16) /* LDS */
+
+#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL)
+
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+enum {
+	MTYPE_CACHED = 0,
+	MTYPE_NONCACHED = 3
+};
+
+/* mmPA_SC_RASTER_CONFIG mask */
+#define RB_MAP_PKR0(x)				((x) << 0)
+#define RB_MAP_PKR0_MASK			(0x3 << 0)
+#define RB_MAP_PKR1(x)				((x) << 2)
+#define RB_MAP_PKR1_MASK			(0x3 << 2)
+#define RB_XSEL2(x)				((x) << 4)
+#define RB_XSEL2_MASK				(0x3 << 4)
+#define RB_XSEL					(1 << 6)
+#define RB_YSEL					(1 << 7)
+#define PKR_MAP(x)				((x) << 8)
+#define PKR_MAP_MASK				(0x3 << 8)
+#define PKR_XSEL(x)				((x) << 10)
+#define PKR_XSEL_MASK				(0x3 << 10)
+#define PKR_YSEL(x)				((x) << 12)
+#define PKR_YSEL_MASK				(0x3 << 12)
+#define SC_MAP(x)				((x) << 16)
+#define SC_MAP_MASK				(0x3 << 16)
+#define SC_XSEL(x)				((x) << 18)
+#define SC_XSEL_MASK				(0x3 << 18)
+#define SC_YSEL(x)				((x) << 20)
+#define SC_YSEL_MASK				(0x3 << 20)
+#define SE_MAP(x)				((x) << 24)
+#define SE_MAP_MASK				(0x3 << 24)
+#define SE_XSEL(x)				((x) << 26)
+#define SE_XSEL_MASK				(0x3 << 26)
+#define SE_YSEL(x)				((x) << 28)
+#define SE_YSEL_MASK				(0x3 << 28)
+
+/* mmPA_SC_RASTER_CONFIG_1 mask */
+#define SE_PAIR_MAP(x)				((x) << 0)
+#define SE_PAIR_MAP_MASK			(0x3 << 0)
+#define SE_PAIR_XSEL(x)				((x) << 2)
+#define SE_PAIR_XSEL_MASK			(0x3 << 2)
+#define SE_PAIR_YSEL(x)				((x) << 4)
+#define SE_PAIR_YSEL_MASK			(0x3 << 4)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h b/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h
new file mode 100644
index 000000000000..c3982f9475fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_ci.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int ci_SECT_CONTEXT_def_1[] =
+{
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0, // HOLE
+    0x00000000, // DB_DEPTH_INFO
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_DEPTH_SIZE
+    0x00000000, // DB_DEPTH_SLICE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // TA_BC_BASE_ADDR
+    0x00000000, // TA_BC_BASE_ADDR_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_HI_0
+    0x00000000, // COHER_DEST_BASE_HI_1
+    0x00000000, // COHER_DEST_BASE_HI_2
+    0x00000000, // COHER_DEST_BASE_HI_3
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int ci_SECT_CONTEXT_def_2[] =
+{
+    0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+    0, // HOLE
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_RINGID
+    0x00000000, // CP_VMID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0xffffffff, // VGT_MAX_VTX_INDX
+    0x00000000, // VGT_MIN_VTX_INDX
+    0x00000000, // VGT_INDX_OFFSET
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0, // HOLE
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_STENCIL_CONTROL
+    0x00000000, // DB_STENCILREFMASK
+    0x00000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0, // HOLE
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int ci_SECT_CONTEXT_def_3[] =
+{
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+    0x00000000, // VGT_DMA_BASE_HI
+    0x00000000, // VGT_DMA_BASE
+};
+static const unsigned int ci_SECT_CONTEXT_def_4[] =
+{
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0x00000000, // VGT_OUTPUT_PATH_CNTL
+    0x00000000, // VGT_HOS_CNTL
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0x00000000, // VGT_HOS_REUSE_DEPTH
+    0x00000000, // VGT_GROUP_PRIM_TYPE
+    0x00000000, // VGT_GROUP_FIRST_DECR
+    0x00000000, // VGT_GROUP_DECR
+    0x00000000, // VGT_GROUP_VECT_0_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_CNTL
+    0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+    0x00000000, // VGT_GS_MODE
+    0x00000000, // VGT_GS_ONCHIP_CNTL
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0x00000100, // VGT_GS_PER_ES
+    0x00000080, // VGT_ES_PER_GS
+    0x00000002, // VGT_GS_PER_VS
+    0x00000000, // VGT_GSVS_RING_OFFSET_1
+    0x00000000, // VGT_GSVS_RING_OFFSET_2
+    0x00000000, // VGT_GSVS_RING_OFFSET_3
+    0x00000000, // VGT_GS_OUT_PRIM_TYPE
+    0x00000000, // IA_ENHANCE
+};
+static const unsigned int ci_SECT_CONTEXT_def_5[] =
+{
+    0x00000000, // WD_ENHANCE
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int ci_SECT_CONTEXT_def_6[] =
+{
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int ci_SECT_CONTEXT_def_7[] =
+{
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_INSTANCE_STEP_RATE_0
+    0x00000000, // VGT_INSTANCE_STEP_RATE_1
+    0x000000ff, // IA_MULTI_VGT_PARAM
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0x00000000, // VGT_GSVS_RING_ITEMSIZE
+    0x00000000, // VGT_REUSE_OFF
+    0x00000000, // VGT_VTX_CNT_EN
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0x00000000, // VGT_GS_VERT_ITEMSIZE
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0, // HOLE
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0x00000000, // VGT_STRMOUT_CONFIG
+    0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+    0x00000010, // VGT_OUT_DEALLOC_CNTL
+    0x00000000, // CB_COLOR0_BASE
+    0x00000000, // CB_COLOR0_PITCH
+    0x00000000, // CB_COLOR0_SLICE
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR0_CMASK
+    0x00000000, // CB_COLOR0_CMASK_SLICE
+    0x00000000, // CB_COLOR0_FMASK
+    0x00000000, // CB_COLOR0_FMASK_SLICE
+    0x00000000, // CB_COLOR0_CLEAR_WORD0
+    0x00000000, // CB_COLOR0_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_BASE
+    0x00000000, // CB_COLOR1_PITCH
+    0x00000000, // CB_COLOR1_SLICE
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR1_CMASK
+    0x00000000, // CB_COLOR1_CMASK_SLICE
+    0x00000000, // CB_COLOR1_FMASK
+    0x00000000, // CB_COLOR1_FMASK_SLICE
+    0x00000000, // CB_COLOR1_CLEAR_WORD0
+    0x00000000, // CB_COLOR1_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_BASE
+    0x00000000, // CB_COLOR2_PITCH
+    0x00000000, // CB_COLOR2_SLICE
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR2_CMASK
+    0x00000000, // CB_COLOR2_CMASK_SLICE
+    0x00000000, // CB_COLOR2_FMASK
+    0x00000000, // CB_COLOR2_FMASK_SLICE
+    0x00000000, // CB_COLOR2_CLEAR_WORD0
+    0x00000000, // CB_COLOR2_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_BASE
+    0x00000000, // CB_COLOR3_PITCH
+    0x00000000, // CB_COLOR3_SLICE
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR3_CMASK
+    0x00000000, // CB_COLOR3_CMASK_SLICE
+    0x00000000, // CB_COLOR3_FMASK
+    0x00000000, // CB_COLOR3_FMASK_SLICE
+    0x00000000, // CB_COLOR3_CLEAR_WORD0
+    0x00000000, // CB_COLOR3_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_BASE
+    0x00000000, // CB_COLOR4_PITCH
+    0x00000000, // CB_COLOR4_SLICE
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR4_CMASK
+    0x00000000, // CB_COLOR4_CMASK_SLICE
+    0x00000000, // CB_COLOR4_FMASK
+    0x00000000, // CB_COLOR4_FMASK_SLICE
+    0x00000000, // CB_COLOR4_CLEAR_WORD0
+    0x00000000, // CB_COLOR4_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_BASE
+    0x00000000, // CB_COLOR5_PITCH
+    0x00000000, // CB_COLOR5_SLICE
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR5_CMASK
+    0x00000000, // CB_COLOR5_CMASK_SLICE
+    0x00000000, // CB_COLOR5_FMASK
+    0x00000000, // CB_COLOR5_FMASK_SLICE
+    0x00000000, // CB_COLOR5_CLEAR_WORD0
+    0x00000000, // CB_COLOR5_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_BASE
+    0x00000000, // CB_COLOR6_PITCH
+    0x00000000, // CB_COLOR6_SLICE
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR6_CMASK
+    0x00000000, // CB_COLOR6_CMASK_SLICE
+    0x00000000, // CB_COLOR6_FMASK
+    0x00000000, // CB_COLOR6_FMASK_SLICE
+    0x00000000, // CB_COLOR6_CLEAR_WORD0
+    0x00000000, // CB_COLOR6_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_BASE
+    0x00000000, // CB_COLOR7_PITCH
+    0x00000000, // CB_COLOR7_SLICE
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR7_CMASK
+    0x00000000, // CB_COLOR7_CMASK_SLICE
+    0x00000000, // CB_COLOR7_FMASK
+    0x00000000, // CB_COLOR7_FMASK_SLICE
+    0x00000000, // CB_COLOR7_CLEAR_WORD0
+    0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def ci_SECT_CONTEXT_defs[] =
+{
+    {ci_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+    {ci_SECT_CONTEXT_def_2, 0x0000a0d6, 274 },
+    {ci_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+    {ci_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+    {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+    {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+    { 0, 0, 0 }
+};
+static const struct cs_section_def ci_cs_data[] = {
+    { ci_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h b/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h
new file mode 100644
index 000000000000..3eda707d7388
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_defs.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef CLEARSTATE_DEFS_H
+#define CLEARSTATE_DEFS_H
+
+enum section_id {
+    SECT_NONE,
+    SECT_CONTEXT,
+    SECT_CLEAR,
+    SECT_CTRLCONST
+};
+
+struct cs_extent_def {
+    const unsigned int *extent;
+    const unsigned int reg_index;
+    const unsigned int reg_count;
+};
+
+struct cs_section_def {
+    const struct cs_extent_def *section;
+    const enum section_id id;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
new file mode 100644
index 000000000000..27a2ff0a07d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
@@ -0,0 +1,975 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int gfx10_SECT_CONTEXT_def_1[] = {
+	0x00000000, // DB_RENDER_CONTROL
+	0x00000000, // DB_COUNT_CONTROL
+	0x00000000, // DB_DEPTH_VIEW
+	0x00000000, // DB_RENDER_OVERRIDE
+	0x00000000, // DB_RENDER_OVERRIDE2
+	0x00000000, // DB_HTILE_DATA_BASE
+	0x00000000, // HOLE
+	0x00000000, // DB_DEPTH_SIZE_XY
+	0x00000000, // DB_DEPTH_BOUNDS_MIN
+	0x00000000, // DB_DEPTH_BOUNDS_MAX
+	0x00000000, // DB_STENCIL_CLEAR
+	0x00000000, // DB_DEPTH_CLEAR
+	0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+	0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+	0x00000000, // DB_DFSM_CONTROL
+	0x00000000, // DB_DEPTH_INFO
+	0x00000000, // DB_Z_INFO
+	0x00000000, // DB_STENCIL_INFO
+	0x00000000, // DB_Z_READ_BASE
+	0x00000000, // DB_STENCIL_READ_BASE
+	0x00000000, // DB_Z_WRITE_BASE
+	0x00000000, // DB_STENCIL_WRITE_BASE
+	0x00000000, // DB_DEPTH_SIZE
+	0x00000000, // DB_DEPTH_SLICE
+	0x00000000, // DB_Z_INFO2
+	0x00000000, // DB_STENCIL_INFO2
+	0x00000000, // DB_Z_READ_BASE_HI
+	0x00000000, // DB_STENCIL_READ_BASE_HI
+	0x00000000, // DB_Z_WRITE_BASE_HI
+	0x00000000, // DB_STENCIL_WRITE_BASE_HI
+	0x00000000, // DB_HTILE_DATA_BASE_HI
+	0x00150055, // DB_RMI_L2_CACHE_CONTROL
+	0x00000000, // TA_BC_BASE_ADDR
+	0x00000000, // TA_BC_BASE_ADDR_HI
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // COHER_DEST_BASE_HI_0
+	0x00000000, // COHER_DEST_BASE_HI_1
+	0x00000000, // COHER_DEST_BASE_HI_2
+	0x00000000, // COHER_DEST_BASE_HI_3
+	0x00000000, // COHER_DEST_BASE_2
+	0x00000000, // COHER_DEST_BASE_3
+	0x00000000, // PA_SC_WINDOW_OFFSET
+	0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+	0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+	0x0000ffff, // PA_SC_CLIPRECT_RULE
+	0x00000000, // PA_SC_CLIPRECT_0_TL
+	0x40004000, // PA_SC_CLIPRECT_0_BR
+	0x00000000, // PA_SC_CLIPRECT_1_TL
+	0x40004000, // PA_SC_CLIPRECT_1_BR
+	0x00000000, // PA_SC_CLIPRECT_2_TL
+	0x40004000, // PA_SC_CLIPRECT_2_BR
+	0x00000000, // PA_SC_CLIPRECT_3_TL
+	0x40004000, // PA_SC_CLIPRECT_3_BR
+	0xaa99aaaa, // PA_SC_EDGERULE
+	0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+	0xffffffff, // CB_TARGET_MASK
+	0xffffffff, // CB_SHADER_MASK
+	0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+	0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+	0x00000000, // COHER_DEST_BASE_0
+	0x00000000, // COHER_DEST_BASE_1
+	0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+	0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+	0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+	0x00000000, // PA_SC_VPORT_ZMIN_0
+	0x3f800000, // PA_SC_VPORT_ZMAX_0
+	0x00000000, // PA_SC_VPORT_ZMIN_1
+	0x3f800000, // PA_SC_VPORT_ZMAX_1
+	0x00000000, // PA_SC_VPORT_ZMIN_2
+	0x3f800000, // PA_SC_VPORT_ZMAX_2
+	0x00000000, // PA_SC_VPORT_ZMIN_3
+	0x3f800000, // PA_SC_VPORT_ZMAX_3
+	0x00000000, // PA_SC_VPORT_ZMIN_4
+	0x3f800000, // PA_SC_VPORT_ZMAX_4
+	0x00000000, // PA_SC_VPORT_ZMIN_5
+	0x3f800000, // PA_SC_VPORT_ZMAX_5
+	0x00000000, // PA_SC_VPORT_ZMIN_6
+	0x3f800000, // PA_SC_VPORT_ZMAX_6
+	0x00000000, // PA_SC_VPORT_ZMIN_7
+	0x3f800000, // PA_SC_VPORT_ZMAX_7
+	0x00000000, // PA_SC_VPORT_ZMIN_8
+	0x3f800000, // PA_SC_VPORT_ZMAX_8
+	0x00000000, // PA_SC_VPORT_ZMIN_9
+	0x3f800000, // PA_SC_VPORT_ZMAX_9
+	0x00000000, // PA_SC_VPORT_ZMIN_10
+	0x3f800000, // PA_SC_VPORT_ZMAX_10
+	0x00000000, // PA_SC_VPORT_ZMIN_11
+	0x3f800000, // PA_SC_VPORT_ZMAX_11
+	0x00000000, // PA_SC_VPORT_ZMIN_12
+	0x3f800000, // PA_SC_VPORT_ZMAX_12
+	0x00000000, // PA_SC_VPORT_ZMIN_13
+	0x3f800000, // PA_SC_VPORT_ZMAX_13
+	0x00000000, // PA_SC_VPORT_ZMIN_14
+	0x3f800000, // PA_SC_VPORT_ZMAX_14
+	0x00000000, // PA_SC_VPORT_ZMIN_15
+	0x3f800000, // PA_SC_VPORT_ZMAX_15
+	0x00000000, // PA_SC_RASTER_CONFIG
+	0x00000000, // PA_SC_RASTER_CONFIG_1
+	0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_2[] = {
+	0x00000000, // CP_PERFMON_CNTX_CNTL
+	0x00000000, // CP_RINGID
+	0x00000000, // CP_VMID
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // PA_SC_RIGHT_VERT_GRID
+	0x00000000, // PA_SC_LEFT_VERT_GRID
+	0x00000000, // PA_SC_HORIZ_GRID
+	0x00000000, // HOLE
+	0x00000000, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0xffffffff, // VGT_MAX_VTX_INDX
+	0x00000000, // VGT_MIN_VTX_INDX
+	0x00000000, // VGT_INDX_OFFSET
+	0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+	0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+	0x00000000, // CB_BLEND_RED
+	0x00000000, // CB_BLEND_GREEN
+	0x00000000, // CB_BLEND_BLUE
+	0x00000000, // CB_BLEND_ALPHA
+	0x00000000, // CB_DCC_CONTROL
+	0x00000000, // CB_COVERAGE_OUT_CONTROL
+	0x00000000, // DB_STENCIL_CONTROL
+	0x01000000, // DB_STENCILREFMASK
+	0x01000000, // DB_STENCILREFMASK_BF
+	0, // HOLE
+	0x00000000, // PA_CL_VPORT_XSCALE
+	0x00000000, // PA_CL_VPORT_XOFFSET
+	0x00000000, // PA_CL_VPORT_YSCALE
+	0x00000000, // PA_CL_VPORT_YOFFSET
+	0x00000000, // PA_CL_VPORT_ZSCALE
+	0x00000000, // PA_CL_VPORT_ZOFFSET
+	0x00000000, // PA_CL_VPORT_XSCALE_1
+	0x00000000, // PA_CL_VPORT_XOFFSET_1
+	0x00000000, // PA_CL_VPORT_YSCALE_1
+	0x00000000, // PA_CL_VPORT_YOFFSET_1
+	0x00000000, // PA_CL_VPORT_ZSCALE_1
+	0x00000000, // PA_CL_VPORT_ZOFFSET_1
+	0x00000000, // PA_CL_VPORT_XSCALE_2
+	0x00000000, // PA_CL_VPORT_XOFFSET_2
+	0x00000000, // PA_CL_VPORT_YSCALE_2
+	0x00000000, // PA_CL_VPORT_YOFFSET_2
+	0x00000000, // PA_CL_VPORT_ZSCALE_2
+	0x00000000, // PA_CL_VPORT_ZOFFSET_2
+	0x00000000, // PA_CL_VPORT_XSCALE_3
+	0x00000000, // PA_CL_VPORT_XOFFSET_3
+	0x00000000, // PA_CL_VPORT_YSCALE_3
+	0x00000000, // PA_CL_VPORT_YOFFSET_3
+	0x00000000, // PA_CL_VPORT_ZSCALE_3
+	0x00000000, // PA_CL_VPORT_ZOFFSET_3
+	0x00000000, // PA_CL_VPORT_XSCALE_4
+	0x00000000, // PA_CL_VPORT_XOFFSET_4
+	0x00000000, // PA_CL_VPORT_YSCALE_4
+	0x00000000, // PA_CL_VPORT_YOFFSET_4
+	0x00000000, // PA_CL_VPORT_ZSCALE_4
+	0x00000000, // PA_CL_VPORT_ZOFFSET_4
+	0x00000000, // PA_CL_VPORT_XSCALE_5
+	0x00000000, // PA_CL_VPORT_XOFFSET_5
+	0x00000000, // PA_CL_VPORT_YSCALE_5
+	0x00000000, // PA_CL_VPORT_YOFFSET_5
+	0x00000000, // PA_CL_VPORT_ZSCALE_5
+	0x00000000, // PA_CL_VPORT_ZOFFSET_5
+	0x00000000, // PA_CL_VPORT_XSCALE_6
+	0x00000000, // PA_CL_VPORT_XOFFSET_6
+	0x00000000, // PA_CL_VPORT_YSCALE_6
+	0x00000000, // PA_CL_VPORT_YOFFSET_6
+	0x00000000, // PA_CL_VPORT_ZSCALE_6
+	0x00000000, // PA_CL_VPORT_ZOFFSET_6
+	0x00000000, // PA_CL_VPORT_XSCALE_7
+	0x00000000, // PA_CL_VPORT_XOFFSET_7
+	0x00000000, // PA_CL_VPORT_YSCALE_7
+	0x00000000, // PA_CL_VPORT_YOFFSET_7
+	0x00000000, // PA_CL_VPORT_ZSCALE_7
+	0x00000000, // PA_CL_VPORT_ZOFFSET_7
+	0x00000000, // PA_CL_VPORT_XSCALE_8
+	0x00000000, // PA_CL_VPORT_XOFFSET_8
+	0x00000000, // PA_CL_VPORT_YSCALE_8
+	0x00000000, // PA_CL_VPORT_YOFFSET_8
+	0x00000000, // PA_CL_VPORT_ZSCALE_8
+	0x00000000, // PA_CL_VPORT_ZOFFSET_8
+	0x00000000, // PA_CL_VPORT_XSCALE_9
+	0x00000000, // PA_CL_VPORT_XOFFSET_9
+	0x00000000, // PA_CL_VPORT_YSCALE_9
+	0x00000000, // PA_CL_VPORT_YOFFSET_9
+	0x00000000, // PA_CL_VPORT_ZSCALE_9
+	0x00000000, // PA_CL_VPORT_ZOFFSET_9
+	0x00000000, // PA_CL_VPORT_XSCALE_10
+	0x00000000, // PA_CL_VPORT_XOFFSET_10
+	0x00000000, // PA_CL_VPORT_YSCALE_10
+	0x00000000, // PA_CL_VPORT_YOFFSET_10
+	0x00000000, // PA_CL_VPORT_ZSCALE_10
+	0x00000000, // PA_CL_VPORT_ZOFFSET_10
+	0x00000000, // PA_CL_VPORT_XSCALE_11
+	0x00000000, // PA_CL_VPORT_XOFFSET_11
+	0x00000000, // PA_CL_VPORT_YSCALE_11
+	0x00000000, // PA_CL_VPORT_YOFFSET_11
+	0x00000000, // PA_CL_VPORT_ZSCALE_11
+	0x00000000, // PA_CL_VPORT_ZOFFSET_11
+	0x00000000, // PA_CL_VPORT_XSCALE_12
+	0x00000000, // PA_CL_VPORT_XOFFSET_12
+	0x00000000, // PA_CL_VPORT_YSCALE_12
+	0x00000000, // PA_CL_VPORT_YOFFSET_12
+	0x00000000, // PA_CL_VPORT_ZSCALE_12
+	0x00000000, // PA_CL_VPORT_ZOFFSET_12
+	0x00000000, // PA_CL_VPORT_XSCALE_13
+	0x00000000, // PA_CL_VPORT_XOFFSET_13
+	0x00000000, // PA_CL_VPORT_YSCALE_13
+	0x00000000, // PA_CL_VPORT_YOFFSET_13
+	0x00000000, // PA_CL_VPORT_ZSCALE_13
+	0x00000000, // PA_CL_VPORT_ZOFFSET_13
+	0x00000000, // PA_CL_VPORT_XSCALE_14
+	0x00000000, // PA_CL_VPORT_XOFFSET_14
+	0x00000000, // PA_CL_VPORT_YSCALE_14
+	0x00000000, // PA_CL_VPORT_YOFFSET_14
+	0x00000000, // PA_CL_VPORT_ZSCALE_14
+	0x00000000, // PA_CL_VPORT_ZOFFSET_14
+	0x00000000, // PA_CL_VPORT_XSCALE_15
+	0x00000000, // PA_CL_VPORT_XOFFSET_15
+	0x00000000, // PA_CL_VPORT_YSCALE_15
+	0x00000000, // PA_CL_VPORT_YOFFSET_15
+	0x00000000, // PA_CL_VPORT_ZSCALE_15
+	0x00000000, // PA_CL_VPORT_ZOFFSET_15
+	0x00000000, // PA_CL_UCP_0_X
+	0x00000000, // PA_CL_UCP_0_Y
+	0x00000000, // PA_CL_UCP_0_Z
+	0x00000000, // PA_CL_UCP_0_W
+	0x00000000, // PA_CL_UCP_1_X
+	0x00000000, // PA_CL_UCP_1_Y
+	0x00000000, // PA_CL_UCP_1_Z
+	0x00000000, // PA_CL_UCP_1_W
+	0x00000000, // PA_CL_UCP_2_X
+	0x00000000, // PA_CL_UCP_2_Y
+	0x00000000, // PA_CL_UCP_2_Z
+	0x00000000, // PA_CL_UCP_2_W
+	0x00000000, // PA_CL_UCP_3_X
+	0x00000000, // PA_CL_UCP_3_Y
+	0x00000000, // PA_CL_UCP_3_Z
+	0x00000000, // PA_CL_UCP_3_W
+	0x00000000, // PA_CL_UCP_4_X
+	0x00000000, // PA_CL_UCP_4_Y
+	0x00000000, // PA_CL_UCP_4_Z
+	0x00000000, // PA_CL_UCP_4_W
+	0x00000000, // PA_CL_UCP_5_X
+	0x00000000, // PA_CL_UCP_5_Y
+	0x00000000, // PA_CL_UCP_5_Z
+	0x00000000, // PA_CL_UCP_5_W
+	0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // SPI_PS_INPUT_CNTL_0
+	0x00000000, // SPI_PS_INPUT_CNTL_1
+	0x00000000, // SPI_PS_INPUT_CNTL_2
+	0x00000000, // SPI_PS_INPUT_CNTL_3
+	0x00000000, // SPI_PS_INPUT_CNTL_4
+	0x00000000, // SPI_PS_INPUT_CNTL_5
+	0x00000000, // SPI_PS_INPUT_CNTL_6
+	0x00000000, // SPI_PS_INPUT_CNTL_7
+	0x00000000, // SPI_PS_INPUT_CNTL_8
+	0x00000000, // SPI_PS_INPUT_CNTL_9
+	0x00000000, // SPI_PS_INPUT_CNTL_10
+	0x00000000, // SPI_PS_INPUT_CNTL_11
+	0x00000000, // SPI_PS_INPUT_CNTL_12
+	0x00000000, // SPI_PS_INPUT_CNTL_13
+	0x00000000, // SPI_PS_INPUT_CNTL_14
+	0x00000000, // SPI_PS_INPUT_CNTL_15
+	0x00000000, // SPI_PS_INPUT_CNTL_16
+	0x00000000, // SPI_PS_INPUT_CNTL_17
+	0x00000000, // SPI_PS_INPUT_CNTL_18
+	0x00000000, // SPI_PS_INPUT_CNTL_19
+	0x00000000, // SPI_PS_INPUT_CNTL_20
+	0x00000000, // SPI_PS_INPUT_CNTL_21
+	0x00000000, // SPI_PS_INPUT_CNTL_22
+	0x00000000, // SPI_PS_INPUT_CNTL_23
+	0x00000000, // SPI_PS_INPUT_CNTL_24
+	0x00000000, // SPI_PS_INPUT_CNTL_25
+	0x00000000, // SPI_PS_INPUT_CNTL_26
+	0x00000000, // SPI_PS_INPUT_CNTL_27
+	0x00000000, // SPI_PS_INPUT_CNTL_28
+	0x00000000, // SPI_PS_INPUT_CNTL_29
+	0x00000000, // SPI_PS_INPUT_CNTL_30
+	0x00000000, // SPI_PS_INPUT_CNTL_31
+	0x00000000, // SPI_VS_OUT_CONFIG
+	0, // HOLE
+	0x00000000, // SPI_PS_INPUT_ENA
+	0x00000000, // SPI_PS_INPUT_ADDR
+	0x00000000, // SPI_INTERP_CONTROL_0
+	0x00000002, // SPI_PS_IN_CONTROL
+	0, // HOLE
+	0x00000000, // SPI_BARYC_CNTL
+	0, // HOLE
+	0x00000000, // SPI_TMPRING_SIZE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // SPI_SHADER_IDX_FORMAT
+	0x00000000, // SPI_SHADER_POS_FORMAT
+	0x00000000, // SPI_SHADER_Z_FORMAT
+	0x00000000, // SPI_SHADER_COL_FORMAT
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // SX_PS_DOWNCONVERT
+	0x00000000, // SX_BLEND_OPT_EPSILON
+	0x00000000, // SX_BLEND_OPT_CONTROL
+	0x00000000, // SX_MRT0_BLEND_OPT
+	0x00000000, // SX_MRT1_BLEND_OPT
+	0x00000000, // SX_MRT2_BLEND_OPT
+	0x00000000, // SX_MRT3_BLEND_OPT
+	0x00000000, // SX_MRT4_BLEND_OPT
+	0x00000000, // SX_MRT5_BLEND_OPT
+	0x00000000, // SX_MRT6_BLEND_OPT
+	0x00000000, // SX_MRT7_BLEND_OPT
+	0x00000000, // CB_BLEND0_CONTROL
+	0x00000000, // CB_BLEND1_CONTROL
+	0x00000000, // CB_BLEND2_CONTROL
+	0x00000000, // CB_BLEND3_CONTROL
+	0x00000000, // CB_BLEND4_CONTROL
+	0x00000000, // CB_BLEND5_CONTROL
+	0x00000000, // CB_BLEND6_CONTROL
+	0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_3[] = {
+	0x00000000, // PA_CL_POINT_X_RAD
+	0x00000000, // PA_CL_POINT_Y_RAD
+	0x00000000, // PA_CL_POINT_SIZE
+	0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_4[] = {
+	0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+	0x00000000, // DB_DEPTH_CONTROL
+	0x00000000, // DB_EQAA
+	0x00000000, // CB_COLOR_CONTROL
+	0x00000000, // DB_SHADER_CONTROL
+	0x00090000, // PA_CL_CLIP_CNTL
+	0x00000004, // PA_SU_SC_MODE_CNTL
+	0x00000000, // PA_CL_VTE_CNTL
+	0x00000000, // PA_CL_VS_OUT_CNTL
+	0x00000000, // PA_CL_NANINF_CNTL
+	0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+	0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+	0x00000000, // PA_SU_PRIM_FILTER_CNTL
+	0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+	0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+	0x00000000, // PA_CL_NGG_CNTL
+	0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // PA_SU_POINT_SIZE
+	0x00000000, // PA_SU_POINT_MINMAX
+	0x00000000, // PA_SU_LINE_CNTL
+	0x00000000, // PA_SC_LINE_STIPPLE
+	0x00000000, // VGT_OUTPUT_PATH_CNTL
+	0x00000000, // VGT_HOS_CNTL
+	0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+	0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+	0x00000000, // VGT_HOS_REUSE_DEPTH
+	0x00000000, // VGT_GROUP_PRIM_TYPE
+	0x00000000, // VGT_GROUP_FIRST_DECR
+	0x00000000, // VGT_GROUP_DECR
+	0x00000000, // VGT_GROUP_VECT_0_CNTL
+	0x00000000, // VGT_GROUP_VECT_1_CNTL
+	0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+	0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+	0x00000000, // VGT_GS_MODE
+	0x00000000, // VGT_GS_ONCHIP_CNTL
+	0x00000000, // PA_SC_MODE_CNTL_0
+	0x00000000, // PA_SC_MODE_CNTL_1
+	0x00000000, // VGT_ENHANCE
+	0x00000100, // VGT_GS_PER_ES
+	0x00000080, // VGT_ES_PER_GS
+	0x00000002, // VGT_GS_PER_VS
+	0x00000000, // VGT_GSVS_RING_OFFSET_1
+	0x00000000, // VGT_GSVS_RING_OFFSET_2
+	0x00000000, // VGT_GSVS_RING_OFFSET_3
+	0x00000000, // VGT_GS_OUT_PRIM_TYPE
+	0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_5[] = {
+	0x00000000, // WD_ENHANCE
+	0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_6[] = {
+	0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_7[] = {
+	0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+	0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+	0x00000000, // HOLE
+	0x00000000, // VGT_INSTANCE_STEP_RATE_0
+	0x00000000, // VGT_INSTANCE_STEP_RATE_1
+	0x000000ff, // IA_MULTI_VGT_PARAM
+	0x00000000, // VGT_ESGS_RING_ITEMSIZE
+	0x00000000, // VGT_GSVS_RING_ITEMSIZE
+	0x00000000, // VGT_REUSE_OFF
+	0x00000000, // VGT_VTX_CNT_EN
+	0x00000000, // DB_HTILE_SURFACE
+	0x00000000, // DB_SRESULTS_COMPARE_STATE0
+	0x00000000, // DB_SRESULTS_COMPARE_STATE1
+	0x00000000, // DB_PRELOAD_CONTROL
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+	0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+	0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+	0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+	0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+	0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+	0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+	0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+	0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+	0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+	0, // HOLE
+	0x00000000, // VGT_GS_MAX_VERT_OUT
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0, // HOLE
+	0x00000000, // VGT_TESS_DISTRIBUTION
+	0x00000000, // VGT_SHADER_STAGES_EN
+	0x00000000, // VGT_LS_HS_CONFIG
+	0x00000000, // VGT_GS_VERT_ITEMSIZE
+	0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+	0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+	0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+	0x00000000, // VGT_TF_PARAM
+	0x00000000, // DB_ALPHA_TO_MASK
+	0x00000000, // VGT_DISPATCH_DRAW_INDEX
+	0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+	0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+	0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+	0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+	0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+	0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+	0x00000000, // VGT_GS_INSTANCE_CNT
+	0x00000000, // VGT_STRMOUT_CONFIG
+	0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_8[] = {
+	0x00000000, // PA_SC_CENTROID_PRIORITY_0
+	0x00000000, // PA_SC_CENTROID_PRIORITY_1
+	0x00001000, // PA_SC_LINE_CNTL
+	0x00000000, // PA_SC_AA_CONFIG
+	0x00000005, // PA_SU_VTX_CNTL
+	0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+	0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+	0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+	0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+	0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+	0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+	0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+	0x00000000, // PA_SC_SHADER_CONTROL
+	0x00000003, // PA_SC_BINNER_CNTL_0
+	0x00000000, // PA_SC_BINNER_CNTL_1
+	0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+	0x00000000, // PA_SC_NGG_MODE_CNTL
+	0, // HOLE
+	0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+	0x00000020, // VGT_OUT_DEALLOC_CNTL
+	0x00000000, // CB_COLOR0_BASE
+	0x00000000, // CB_COLOR0_PITCH
+	0x00000000, // CB_COLOR0_SLICE
+	0x00000000, // CB_COLOR0_VIEW
+	0x00000000, // CB_COLOR0_INFO
+	0x00000000, // CB_COLOR0_ATTRIB
+	0x00000000, // CB_COLOR0_DCC_CONTROL
+	0x00000000, // CB_COLOR0_CMASK
+	0x00000000, // CB_COLOR0_CMASK_SLICE
+	0x00000000, // CB_COLOR0_FMASK
+	0x00000000, // CB_COLOR0_FMASK_SLICE
+	0x00000000, // CB_COLOR0_CLEAR_WORD0
+	0x00000000, // CB_COLOR0_CLEAR_WORD1
+	0x00000000, // CB_COLOR0_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR1_BASE
+	0x00000000, // CB_COLOR1_PITCH
+	0x00000000, // CB_COLOR1_SLICE
+	0x00000000, // CB_COLOR1_VIEW
+	0x00000000, // CB_COLOR1_INFO
+	0x00000000, // CB_COLOR1_ATTRIB
+	0x00000000, // CB_COLOR1_DCC_CONTROL
+	0x00000000, // CB_COLOR1_CMASK
+	0x00000000, // CB_COLOR1_CMASK_SLICE
+	0x00000000, // CB_COLOR1_FMASK
+	0x00000000, // CB_COLOR1_FMASK_SLICE
+	0x00000000, // CB_COLOR1_CLEAR_WORD0
+	0x00000000, // CB_COLOR1_CLEAR_WORD1
+	0x00000000, // CB_COLOR1_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR2_BASE
+	0x00000000, // CB_COLOR2_PITCH
+	0x00000000, // CB_COLOR2_SLICE
+	0x00000000, // CB_COLOR2_VIEW
+	0x00000000, // CB_COLOR2_INFO
+	0x00000000, // CB_COLOR2_ATTRIB
+	0x00000000, // CB_COLOR2_DCC_CONTROL
+	0x00000000, // CB_COLOR2_CMASK
+	0x00000000, // CB_COLOR2_CMASK_SLICE
+	0x00000000, // CB_COLOR2_FMASK
+	0x00000000, // CB_COLOR2_FMASK_SLICE
+	0x00000000, // CB_COLOR2_CLEAR_WORD0
+	0x00000000, // CB_COLOR2_CLEAR_WORD1
+	0x00000000, // CB_COLOR2_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR3_BASE
+	0x00000000, // CB_COLOR3_PITCH
+	0x00000000, // CB_COLOR3_SLICE
+	0x00000000, // CB_COLOR3_VIEW
+	0x00000000, // CB_COLOR3_INFO
+	0x00000000, // CB_COLOR3_ATTRIB
+	0x00000000, // CB_COLOR3_DCC_CONTROL
+	0x00000000, // CB_COLOR3_CMASK
+	0x00000000, // CB_COLOR3_CMASK_SLICE
+	0x00000000, // CB_COLOR3_FMASK
+	0x00000000, // CB_COLOR3_FMASK_SLICE
+	0x00000000, // CB_COLOR3_CLEAR_WORD0
+	0x00000000, // CB_COLOR3_CLEAR_WORD1
+	0x00000000, // CB_COLOR3_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR4_BASE
+	0x00000000, // CB_COLOR4_PITCH
+	0x00000000, // CB_COLOR4_SLICE
+	0x00000000, // CB_COLOR4_VIEW
+	0x00000000, // CB_COLOR4_INFO
+	0x00000000, // CB_COLOR4_ATTRIB
+	0x00000000, // CB_COLOR4_DCC_CONTROL
+	0x00000000, // CB_COLOR4_CMASK
+	0x00000000, // CB_COLOR4_CMASK_SLICE
+	0x00000000, // CB_COLOR4_FMASK
+	0x00000000, // CB_COLOR4_FMASK_SLICE
+	0x00000000, // CB_COLOR4_CLEAR_WORD0
+	0x00000000, // CB_COLOR4_CLEAR_WORD1
+	0x00000000, // CB_COLOR4_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR5_BASE
+	0x00000000, // CB_COLOR5_PITCH
+	0x00000000, // CB_COLOR5_SLICE
+	0x00000000, // CB_COLOR5_VIEW
+	0x00000000, // CB_COLOR5_INFO
+	0x00000000, // CB_COLOR5_ATTRIB
+	0x00000000, // CB_COLOR5_DCC_CONTROL
+	0x00000000, // CB_COLOR5_CMASK
+	0x00000000, // CB_COLOR5_CMASK_SLICE
+	0x00000000, // CB_COLOR5_FMASK
+	0x00000000, // CB_COLOR5_FMASK_SLICE
+	0x00000000, // CB_COLOR5_CLEAR_WORD0
+	0x00000000, // CB_COLOR5_CLEAR_WORD1
+	0x00000000, // CB_COLOR5_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR6_BASE
+	0x00000000, // CB_COLOR6_PITCH
+	0x00000000, // CB_COLOR6_SLICE
+	0x00000000, // CB_COLOR6_VIEW
+	0x00000000, // CB_COLOR6_INFO
+	0x00000000, // CB_COLOR6_ATTRIB
+	0x00000000, // CB_COLOR6_DCC_CONTROL
+	0x00000000, // CB_COLOR6_CMASK
+	0x00000000, // CB_COLOR6_CMASK_SLICE
+	0x00000000, // CB_COLOR6_FMASK
+	0x00000000, // CB_COLOR6_FMASK_SLICE
+	0x00000000, // CB_COLOR6_CLEAR_WORD0
+	0x00000000, // CB_COLOR6_CLEAR_WORD1
+	0x00000000, // CB_COLOR6_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR7_BASE
+	0x00000000, // CB_COLOR7_PITCH
+	0x00000000, // CB_COLOR7_SLICE
+	0x00000000, // CB_COLOR7_VIEW
+	0x00000000, // CB_COLOR7_INFO
+	0x00000000, // CB_COLOR7_ATTRIB
+	0x00000000, // CB_COLOR7_DCC_CONTROL
+	0x00000000, // CB_COLOR7_CMASK
+	0x00000000, // CB_COLOR7_CMASK_SLICE
+	0x00000000, // CB_COLOR7_FMASK
+	0x00000000, // CB_COLOR7_FMASK_SLICE
+	0x00000000, // CB_COLOR7_CLEAR_WORD0
+	0x00000000, // CB_COLOR7_CLEAR_WORD1
+	0x00000000, // CB_COLOR7_DCC_BASE
+	0, // HOLE
+	0x00000000, // CB_COLOR0_BASE_EXT
+	0x00000000, // CB_COLOR1_BASE_EXT
+	0x00000000, // CB_COLOR2_BASE_EXT
+	0x00000000, // CB_COLOR3_BASE_EXT
+	0x00000000, // CB_COLOR4_BASE_EXT
+	0x00000000, // CB_COLOR5_BASE_EXT
+	0x00000000, // CB_COLOR6_BASE_EXT
+	0x00000000, // CB_COLOR7_BASE_EXT
+	0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+	0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+	0x00000000, // CB_COLOR0_DCC_BASE_EXT
+	0x00000000, // CB_COLOR1_DCC_BASE_EXT
+	0x00000000, // CB_COLOR2_DCC_BASE_EXT
+	0x00000000, // CB_COLOR3_DCC_BASE_EXT
+	0x00000000, // CB_COLOR4_DCC_BASE_EXT
+	0x00000000, // CB_COLOR5_DCC_BASE_EXT
+	0x00000000, // CB_COLOR6_DCC_BASE_EXT
+	0x00000000, // CB_COLOR7_DCC_BASE_EXT
+	0x00000000, // CB_COLOR0_ATTRIB2
+	0x00000000, // CB_COLOR1_ATTRIB2
+	0x00000000, // CB_COLOR2_ATTRIB2
+	0x00000000, // CB_COLOR3_ATTRIB2
+	0x00000000, // CB_COLOR4_ATTRIB2
+	0x00000000, // CB_COLOR5_ATTRIB2
+	0x00000000, // CB_COLOR6_ATTRIB2
+	0x00000000, // CB_COLOR7_ATTRIB2
+	0x00000000, // CB_COLOR0_ATTRIB3
+	0x00000000, // CB_COLOR1_ATTRIB3
+	0x00000000, // CB_COLOR2_ATTRIB3
+	0x00000000, // CB_COLOR3_ATTRIB3
+	0x00000000, // CB_COLOR4_ATTRIB3
+	0x00000000, // CB_COLOR5_ATTRIB3
+	0x00000000, // CB_COLOR6_ATTRIB3
+	0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx10_SECT_CONTEXT_defs[] = {
+	{gfx10_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+	{gfx10_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+	{gfx10_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+	{gfx10_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+	{gfx10_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+	{gfx10_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+	{gfx10_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+	{gfx10_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
+	{ 0, 0, 0 }
+};
+static const struct cs_section_def gfx10_cs_data[] = {
+	{ gfx10_SECT_CONTEXT_defs, SECT_CONTEXT },
+	{ 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
new file mode 100644
index 000000000000..a8b29d33c464
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
@@ -0,0 +1,997 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX11_H_
+#define __CLEARSTATE_GFX11_H_
+
+static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
+{
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0, // HOLE
+    0x00000000, // DB_DEPTH_SIZE_XY
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0, // HOLE
+    0x00000000, // DB_RESERVED_REG_2
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_RESERVED_REG_1
+    0x00000000, // DB_RESERVED_REG_3
+    0x00000000, // DB_SPI_VRS_CENTER_LOCATION
+    0, // HOLE
+    0x00000000, // DB_Z_READ_BASE_HI
+    0x00000000, // DB_STENCIL_READ_BASE_HI
+    0x00000000, // DB_Z_WRITE_BASE_HI
+    0x00000000, // DB_STENCIL_WRITE_BASE_HI
+    0x00000000, // DB_HTILE_DATA_BASE_HI
+    0x00150055, // DB_RMI_L2_CACHE_CONTROL
+    0x00000000, // TA_BC_BASE_ADDR
+    0x00000000, // TA_BC_BASE_ADDR_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_HI_0
+    0x00000000, // COHER_DEST_BASE_HI_1
+    0x00000000, // COHER_DEST_BASE_HI_2
+    0x00000000, // COHER_DEST_BASE_HI_3
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+    0x00000000, // PA_SC_RASTER_CONFIG
+    0x00000000, // PA_SC_RASTER_CONFIG_1
+    0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
+{
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_PIPEID
+    0x00000000, // CP_VMID
+    0x00000000, // CONTEXT_RESERVED_REG0
+    0x00000000, // CONTEXT_RESERVED_REG1
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_FSR_EN
+    0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
+    0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
+    0, // HOLE
+    0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
+    0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
+    0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
+    0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
+    0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+    0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_VRS_RATE_BASE
+    0x00000000, // PA_SC_VRS_RATE_BASE_EXT
+    0x00000000, // PA_SC_VRS_RATE_SIZE_XY
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0x00000000, // CB_FDCC_CONTROL
+    0x00000000, // CB_COVERAGE_OUT_CONTROL
+    0x00000000, // DB_STENCIL_CONTROL
+    0x01000000, // DB_STENCILREFMASK
+    0x01000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+    0x00000000, // PA_RATE_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0x00000000, // SPI_BARYC_SSAA_CNTL
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0x00000000, // SPI_GFX_SCRATCH_BASE_LO
+    0x00000000, // SPI_GFX_SCRATCH_BASE_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_SHADER_IDX_FORMAT
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SX_PS_DOWNCONVERT_CONTROL
+    0x00000000, // SX_PS_DOWNCONVERT
+    0x00000000, // SX_BLEND_OPT_EPSILON
+    0x00000000, // SX_BLEND_OPT_CONTROL
+    0x00000000, // SX_MRT0_BLEND_OPT
+    0x00000000, // SX_MRT1_BLEND_OPT
+    0x00000000, // SX_MRT2_BLEND_OPT
+    0x00000000, // SX_MRT3_BLEND_OPT
+    0x00000000, // SX_MRT4_BLEND_OPT
+    0x00000000, // SX_MRT5_BLEND_OPT
+    0x00000000, // SX_MRT6_BLEND_OPT
+    0x00000000, // SX_MRT7_BLEND_OPT
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_3[] =
+{
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
+{
+    0x00000000, // GE_MAX_OUTPUT_PER_SUBGROUP
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+    0, // HOLE
+    0x00000000, // PA_CL_NGG_CNTL
+    0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+    0x00000000, // PA_STEREO_CNTL
+    0x00000000, // PA_STATE_STEREO_X
+    0x00000000, // PA_CL_VRS_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_GS_ONCHIP_CNTL
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
+{
+    0x00000000, // WD_ENHANCE
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
+{
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
+{
+    0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0, // HOLE
+    0x00000000, // VGT_REUSE_OFF
+    0, // HOLE
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // GE_NGG_SUBGRP_CNTL
+    0x00000000, // VGT_TESS_DISTRIBUTION
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0, // HOLE
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0x00000000, // PA_SC_SHADER_CONTROL
+    0x00000003, // PA_SC_BINNER_CNTL_0
+    0x00000000, // PA_SC_BINNER_CNTL_1
+    0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+    0x00000000, // PA_SC_NGG_MODE_CNTL
+    0x00000000, // PA_SC_BINNER_CNTL_2
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR0_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0x00000000, // CB_COLOR0_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR0_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0x00000000, // CB_COLOR1_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0x00000000, // CB_COLOR2_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0x00000000, // CB_COLOR3_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0x00000000, // CB_COLOR4_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0x00000000, // CB_COLOR5_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0x00000000, // CB_COLOR6_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0x00000000, // CB_COLOR7_FDCC_CONTROL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR0_BASE_EXT
+    0x00000000, // CB_COLOR1_BASE_EXT
+    0x00000000, // CB_COLOR2_BASE_EXT
+    0x00000000, // CB_COLOR3_BASE_EXT
+    0x00000000, // CB_COLOR4_BASE_EXT
+    0x00000000, // CB_COLOR5_BASE_EXT
+    0x00000000, // CB_COLOR6_BASE_EXT
+    0x00000000, // CB_COLOR7_BASE_EXT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR0_DCC_BASE_EXT
+    0x00000000, // CB_COLOR1_DCC_BASE_EXT
+    0x00000000, // CB_COLOR2_DCC_BASE_EXT
+    0x00000000, // CB_COLOR3_DCC_BASE_EXT
+    0x00000000, // CB_COLOR4_DCC_BASE_EXT
+    0x00000000, // CB_COLOR5_DCC_BASE_EXT
+    0x00000000, // CB_COLOR6_DCC_BASE_EXT
+    0x00000000, // CB_COLOR7_DCC_BASE_EXT
+    0x00000000, // CB_COLOR0_ATTRIB2
+    0x00000000, // CB_COLOR1_ATTRIB2
+    0x00000000, // CB_COLOR2_ATTRIB2
+    0x00000000, // CB_COLOR3_ATTRIB2
+    0x00000000, // CB_COLOR4_ATTRIB2
+    0x00000000, // CB_COLOR5_ATTRIB2
+    0x00000000, // CB_COLOR6_ATTRIB2
+    0x00000000, // CB_COLOR7_ATTRIB2
+    0x00000000, // CB_COLOR0_ATTRIB3
+    0x00000000, // CB_COLOR1_ATTRIB3
+    0x00000000, // CB_COLOR2_ATTRIB3
+    0x00000000, // CB_COLOR3_ATTRIB3
+    0x00000000, // CB_COLOR4_ATTRIB3
+    0x00000000, // CB_COLOR5_ATTRIB3
+    0x00000000, // CB_COLOR6_ATTRIB3
+    0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
+{
+    {gfx11_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+    {gfx11_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+    {gfx11_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+    {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+    {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+    {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
+    { 0, 0, 0 }
+};
+static const struct cs_section_def gfx11_cs_data[] = {
+    { gfx11_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX11_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
new file mode 100644
index 000000000000..2f6c9d11d5ae
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx12.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CLEARSTATE_GFX12_H_
+#define __CLEARSTATE_GFX12_H_
+
+static const unsigned int gfx12_SECT_CONTEXT_def_1[] = {
+0x00000000, //mmSC_MEM_TEMPORAL
+0x00000000, //mmSC_MEM_SPEC_READ
+0x00000000, //mmPA_SC_VPORT_0_TL
+0x00000000, //mmPA_SC_VPORT_0_BR
+0x00000000, //mmPA_SC_VPORT_1_TL
+0x00000000, //mmPA_SC_VPORT_1_BR
+0x00000000, //mmPA_SC_VPORT_2_TL
+0x00000000, //mmPA_SC_VPORT_2_BR
+0x00000000, //mmPA_SC_VPORT_3_TL
+0x00000000, //mmPA_SC_VPORT_3_BR
+0x00000000, //mmPA_SC_VPORT_4_TL
+0x00000000, //mmPA_SC_VPORT_4_BR
+0x00000000, //mmPA_SC_VPORT_5_TL
+0x00000000, //mmPA_SC_VPORT_5_BR
+0x00000000, //mmPA_SC_VPORT_6_TL
+0x00000000, //mmPA_SC_VPORT_6_BR
+0x00000000, //mmPA_SC_VPORT_7_TL
+0x00000000, //mmPA_SC_VPORT_7_BR
+0x00000000, //mmPA_SC_VPORT_8_TL
+0x00000000, //mmPA_SC_VPORT_8_BR
+0x00000000, //mmPA_SC_VPORT_9_TL
+0x00000000, //mmPA_SC_VPORT_9_BR
+0x00000000, //mmPA_SC_VPORT_10_TL
+0x00000000, //mmPA_SC_VPORT_10_BR
+0x00000000, //mmPA_SC_VPORT_11_TL
+0x00000000, //mmPA_SC_VPORT_11_BR
+0x00000000, //mmPA_SC_VPORT_12_TL
+0x00000000, //mmPA_SC_VPORT_12_BR
+0x00000000, //mmPA_SC_VPORT_13_TL
+0x00000000, //mmPA_SC_VPORT_13_BR
+0x00000000, //mmPA_SC_VPORT_14_TL
+0x00000000, //mmPA_SC_VPORT_14_BR
+0x00000000, //mmPA_SC_VPORT_15_TL
+0x00000000, //mmPA_SC_VPORT_15_BR
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_2[] = {
+0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z
+0x00000000, //mmPA_RATE_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_3[] = {
+0x00000000, //mmCP_PERFMON_CNTX_CNTL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_4[] = {
+0x00000000, //mmCONTEXT_RESERVED_REG0
+0x00000000, //mmCONTEXT_RESERVED_REG1
+0x00000000, //mmPA_SC_CLIPRECT_0_EXT
+0x00000000, //mmPA_SC_CLIPRECT_1_EXT
+0x00000000, //mmPA_SC_CLIPRECT_2_EXT
+0x00000000, //mmPA_SC_CLIPRECT_3_EXT
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_5[] = {
+0x00000000, //mmPA_SC_HIZ_INFO
+0x00000000, //mmPA_SC_HIS_INFO
+0x00000000, //mmPA_SC_HIZ_BASE
+0x00000000, //mmPA_SC_HIZ_BASE_EXT
+0x00000000, //mmPA_SC_HIZ_SIZE_XY
+0x00000000, //mmPA_SC_HIS_BASE
+0x00000000, //mmPA_SC_HIS_BASE_EXT
+0x00000000, //mmPA_SC_HIS_SIZE_XY
+0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
+0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT
+0x00000000, //mmPA_SC_HISZ_CONTROL
+};
+
+static const unsigned int gfx12_SECT_CONTEXT_def_6[] = {
+0x00000000, //mmCB_MEM0_INFO
+0x00000000, //mmCB_MEM1_INFO
+0x00000000, //mmCB_MEM2_INFO
+0x00000000, //mmCB_MEM3_INFO
+0x00000000, //mmCB_MEM4_INFO
+0x00000000, //mmCB_MEM5_INFO
+0x00000000, //mmCB_MEM6_INFO
+0x00000000, //mmCB_MEM7_INFO
+};
+
+static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = {
+    {gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 },
+    {gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 },
+    {gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 },
+    {gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 },
+    {gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 },
+    {gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 },
+    { 0, 0, 0 }
+};
+
+static const struct cs_section_def gfx12_cs_data[] = {
+    { gfx12_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
+
+#endif /* __CLEARSTATE_GFX12_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
new file mode 100644
index 000000000000..9c85ca6358c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h
@@ -0,0 +1,933 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int gfx9_SECT_CONTEXT_def_1[] = {
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0x00000000, // DB_HTILE_DATA_BASE_HI
+    0x00000000, // DB_DEPTH_SIZE
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_Z_READ_BASE_HI
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE_HI
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_Z_WRITE_BASE_HI
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE_HI
+    0x00000000, // DB_DFSM_CONTROL
+    0, // HOLE
+    0x00000000, // DB_Z_INFO2
+    0x00000000, // DB_STENCIL_INFO2
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // TA_BC_BASE_ADDR
+    0x00000000, // TA_BC_BASE_ADDR_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_HI_0
+    0x00000000, // COHER_DEST_BASE_HI_1
+    0x00000000, // COHER_DEST_BASE_HI_2
+    0x00000000, // COHER_DEST_BASE_HI_3
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_2[] = {
+    0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+    0x00000000, // PA_SC_TILE_STEERING_OVERRIDE
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_RINGID
+    0x00000000, // CP_VMID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_RIGHT_VERT_GRID
+    0x00000000, // PA_SC_LEFT_VERT_GRID
+    0x00000000, // PA_SC_HORIZ_GRID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0, // HOLE
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0x00000000, // CB_DCC_CONTROL
+    0, // HOLE
+    0x00000000, // DB_STENCIL_CONTROL
+    0x01000000, // DB_STENCILREFMASK
+    0x01000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0, // HOLE
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SX_PS_DOWNCONVERT
+    0x00000000, // SX_BLEND_OPT_EPSILON
+    0x00000000, // SX_BLEND_OPT_CONTROL
+    0x00000000, // SX_MRT0_BLEND_OPT
+    0x00000000, // SX_MRT1_BLEND_OPT
+    0x00000000, // SX_MRT2_BLEND_OPT
+    0x00000000, // SX_MRT3_BLEND_OPT
+    0x00000000, // SX_MRT4_BLEND_OPT
+    0x00000000, // SX_MRT5_BLEND_OPT
+    0x00000000, // SX_MRT6_BLEND_OPT
+    0x00000000, // SX_MRT7_BLEND_OPT
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+    0x00000000, // CB_MRT0_EPITCH
+    0x00000000, // CB_MRT1_EPITCH
+    0x00000000, // CB_MRT2_EPITCH
+    0x00000000, // CB_MRT3_EPITCH
+    0x00000000, // CB_MRT4_EPITCH
+    0x00000000, // CB_MRT5_EPITCH
+    0x00000000, // CB_MRT6_EPITCH
+    0x00000000, // CB_MRT7_EPITCH
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_3[] = {
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_4[] = {
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+    0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+    0x00000000, // PA_CL_NGG_CNTL
+    0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0x00000000, // VGT_OUTPUT_PATH_CNTL
+    0x00000000, // VGT_HOS_CNTL
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0x00000000, // VGT_HOS_REUSE_DEPTH
+    0x00000000, // VGT_GROUP_PRIM_TYPE
+    0x00000000, // VGT_GROUP_FIRST_DECR
+    0x00000000, // VGT_GROUP_DECR
+    0x00000000, // VGT_GROUP_VECT_0_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_CNTL
+    0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+    0x00000000, // VGT_GS_MODE
+    0x00000000, // VGT_GS_ONCHIP_CNTL
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0x00000100, // VGT_GS_PER_ES
+    0x00000080, // VGT_ES_PER_GS
+    0x00000002, // VGT_GS_PER_VS
+    0x00000000, // VGT_GSVS_RING_OFFSET_1
+    0x00000000, // VGT_GSVS_RING_OFFSET_2
+    0x00000000, // VGT_GSVS_RING_OFFSET_3
+    0x00000000, // VGT_GS_OUT_PRIM_TYPE
+    0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_5[] = {
+    0x00000000, // WD_ENHANCE
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_6[] = {
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_7[] = {
+    0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+    0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+    0, // HOLE
+    0x00000000, // VGT_INSTANCE_STEP_RATE_0
+    0x00000000, // VGT_INSTANCE_STEP_RATE_1
+    0, // HOLE
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0x00000000, // VGT_GSVS_RING_ITEMSIZE
+    0x00000000, // VGT_REUSE_OFF
+    0x00000000, // VGT_VTX_CNT_EN
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_TESS_DISTRIBUTION
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0x00000000, // VGT_GS_VERT_ITEMSIZE
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0x00000000, // VGT_DISPATCH_DRAW_INDEX
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0x00000000, // VGT_STRMOUT_CONFIG
+    0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx9_SECT_CONTEXT_def_8[] = {
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0x00000000, // PA_SC_SHADER_CONTROL
+    0x00000003, // PA_SC_BINNER_CNTL_0
+    0x00000000, // PA_SC_BINNER_CNTL_1
+    0x00000000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+    0x00000000, // PA_SC_NGG_MODE_CNTL
+    0, // HOLE
+    0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+    0x00000020, // VGT_OUT_DEALLOC_CNTL
+    0x00000000, // CB_COLOR0_BASE
+    0x00000000, // CB_COLOR0_BASE_EXT
+    0x00000000, // CB_COLOR0_ATTRIB2
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0x00000000, // CB_COLOR0_DCC_CONTROL
+    0x00000000, // CB_COLOR0_CMASK
+    0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR0_FMASK
+    0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR0_CLEAR_WORD0
+    0x00000000, // CB_COLOR0_CLEAR_WORD1
+    0x00000000, // CB_COLOR0_DCC_BASE
+    0x00000000, // CB_COLOR0_DCC_BASE_EXT
+    0x00000000, // CB_COLOR1_BASE
+    0x00000000, // CB_COLOR1_BASE_EXT
+    0x00000000, // CB_COLOR1_ATTRIB2
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0x00000000, // CB_COLOR1_DCC_CONTROL
+    0x00000000, // CB_COLOR1_CMASK
+    0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR1_FMASK
+    0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR1_CLEAR_WORD0
+    0x00000000, // CB_COLOR1_CLEAR_WORD1
+    0x00000000, // CB_COLOR1_DCC_BASE
+    0x00000000, // CB_COLOR1_DCC_BASE_EXT
+    0x00000000, // CB_COLOR2_BASE
+    0x00000000, // CB_COLOR2_BASE_EXT
+    0x00000000, // CB_COLOR2_ATTRIB2
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0x00000000, // CB_COLOR2_DCC_CONTROL
+    0x00000000, // CB_COLOR2_CMASK
+    0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR2_FMASK
+    0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR2_CLEAR_WORD0
+    0x00000000, // CB_COLOR2_CLEAR_WORD1
+    0x00000000, // CB_COLOR2_DCC_BASE
+    0x00000000, // CB_COLOR2_DCC_BASE_EXT
+    0x00000000, // CB_COLOR3_BASE
+    0x00000000, // CB_COLOR3_BASE_EXT
+    0x00000000, // CB_COLOR3_ATTRIB2
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0x00000000, // CB_COLOR3_DCC_CONTROL
+    0x00000000, // CB_COLOR3_CMASK
+    0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR3_FMASK
+    0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR3_CLEAR_WORD0
+    0x00000000, // CB_COLOR3_CLEAR_WORD1
+    0x00000000, // CB_COLOR3_DCC_BASE
+    0x00000000, // CB_COLOR3_DCC_BASE_EXT
+    0x00000000, // CB_COLOR4_BASE
+    0x00000000, // CB_COLOR4_BASE_EXT
+    0x00000000, // CB_COLOR4_ATTRIB2
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0x00000000, // CB_COLOR4_DCC_CONTROL
+    0x00000000, // CB_COLOR4_CMASK
+    0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR4_FMASK
+    0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR4_CLEAR_WORD0
+    0x00000000, // CB_COLOR4_CLEAR_WORD1
+    0x00000000, // CB_COLOR4_DCC_BASE
+    0x00000000, // CB_COLOR4_DCC_BASE_EXT
+    0x00000000, // CB_COLOR5_BASE
+    0x00000000, // CB_COLOR5_BASE_EXT
+    0x00000000, // CB_COLOR5_ATTRIB2
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0x00000000, // CB_COLOR5_DCC_CONTROL
+    0x00000000, // CB_COLOR5_CMASK
+    0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR5_FMASK
+    0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR5_CLEAR_WORD0
+    0x00000000, // CB_COLOR5_CLEAR_WORD1
+    0x00000000, // CB_COLOR5_DCC_BASE
+    0x00000000, // CB_COLOR5_DCC_BASE_EXT
+    0x00000000, // CB_COLOR6_BASE
+    0x00000000, // CB_COLOR6_BASE_EXT
+    0x00000000, // CB_COLOR6_ATTRIB2
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0x00000000, // CB_COLOR6_DCC_CONTROL
+    0x00000000, // CB_COLOR6_CMASK
+    0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR6_FMASK
+    0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR6_CLEAR_WORD0
+    0x00000000, // CB_COLOR6_CLEAR_WORD1
+    0x00000000, // CB_COLOR6_DCC_BASE
+    0x00000000, // CB_COLOR6_DCC_BASE_EXT
+    0x00000000, // CB_COLOR7_BASE
+    0x00000000, // CB_COLOR7_BASE_EXT
+    0x00000000, // CB_COLOR7_ATTRIB2
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0x00000000, // CB_COLOR7_DCC_CONTROL
+    0x00000000, // CB_COLOR7_CMASK
+    0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+    0x00000000, // CB_COLOR7_FMASK
+    0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+    0x00000000, // CB_COLOR7_CLEAR_WORD0
+    0x00000000, // CB_COLOR7_CLEAR_WORD1
+    0x00000000, // CB_COLOR7_DCC_BASE
+    0x00000000, // CB_COLOR7_DCC_BASE_EXT
+};
+static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = {
+    {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+    {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 },
+    {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+    {gfx9_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+    {gfx9_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+    {gfx9_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {gfx9_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+    {gfx9_SECT_CONTEXT_def_8, 0x0000a2f5, 155 },
+    { 0, 0, 0 }
+};
+static const struct cs_section_def gfx9_cs_data[] = {
+    { gfx9_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
new file mode 100644
index 000000000000..5fd96ddd7f0f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h
@@ -0,0 +1,933 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const u32 si_SECT_CONTEXT_def_1[] = {
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0, // HOLE
+    0x00000000, // DB_DEPTH_INFO
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_DEPTH_SIZE
+    0x00000000, // DB_DEPTH_SLICE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // TA_BC_BASE_ADDR
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const u32 si_SECT_CONTEXT_def_2[] = {
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_RINGID
+    0x00000000, // CP_VMID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0xffffffff, // VGT_MAX_VTX_INDX
+    0x00000000, // VGT_MIN_VTX_INDX
+    0x00000000, // VGT_INDX_OFFSET
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0, // HOLE
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_STENCIL_CONTROL
+    0x00000000, // DB_STENCILREFMASK
+    0x00000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0, // HOLE
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_WAVE_MGMT_1
+    0x00000000, // SPI_WAVE_MGMT_2
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+};
+static const u32 si_SECT_CONTEXT_def_3[] = {
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+    0x00000000, // VGT_DMA_BASE_HI
+    0x00000000, // VGT_DMA_BASE
+};
+static const u32 si_SECT_CONTEXT_def_4[] = {
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0x00000000, // VGT_OUTPUT_PATH_CNTL
+    0x00000000, // VGT_HOS_CNTL
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0x00000000, // VGT_HOS_REUSE_DEPTH
+    0x00000000, // VGT_GROUP_PRIM_TYPE
+    0x00000000, // VGT_GROUP_FIRST_DECR
+    0x00000000, // VGT_GROUP_DECR
+    0x00000000, // VGT_GROUP_VECT_0_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_CNTL
+    0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+    0x00000000, // VGT_GS_MODE
+    0, // HOLE
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0x00000100, // VGT_GS_PER_ES
+    0x00000080, // VGT_ES_PER_GS
+    0x00000002, // VGT_GS_PER_VS
+    0x00000000, // VGT_GSVS_RING_OFFSET_1
+    0x00000000, // VGT_GSVS_RING_OFFSET_2
+    0x00000000, // VGT_GSVS_RING_OFFSET_3
+    0x00000000, // VGT_GS_OUT_PRIM_TYPE
+    0x00000000, // IA_ENHANCE
+};
+static const u32 si_SECT_CONTEXT_def_5[] = {
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const u32 si_SECT_CONTEXT_def_6[] = {
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const u32 si_SECT_CONTEXT_def_7[] = {
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_INSTANCE_STEP_RATE_0
+    0x00000000, // VGT_INSTANCE_STEP_RATE_1
+    0x000000ff, // IA_MULTI_VGT_PARAM
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0x00000000, // VGT_GSVS_RING_ITEMSIZE
+    0x00000000, // VGT_REUSE_OFF
+    0x00000000, // VGT_VTX_CNT_EN
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0x00000000, // VGT_GS_VERT_ITEMSIZE
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0, // HOLE
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0x00000000, // VGT_STRMOUT_CONFIG
+    0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+    0x00000010, // VGT_OUT_DEALLOC_CNTL
+    0x00000000, // CB_COLOR0_BASE
+    0x00000000, // CB_COLOR0_PITCH
+    0x00000000, // CB_COLOR0_SLICE
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR0_CMASK
+    0x00000000, // CB_COLOR0_CMASK_SLICE
+    0x00000000, // CB_COLOR0_FMASK
+    0x00000000, // CB_COLOR0_FMASK_SLICE
+    0x00000000, // CB_COLOR0_CLEAR_WORD0
+    0x00000000, // CB_COLOR0_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_BASE
+    0x00000000, // CB_COLOR1_PITCH
+    0x00000000, // CB_COLOR1_SLICE
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR1_CMASK
+    0x00000000, // CB_COLOR1_CMASK_SLICE
+    0x00000000, // CB_COLOR1_FMASK
+    0x00000000, // CB_COLOR1_FMASK_SLICE
+    0x00000000, // CB_COLOR1_CLEAR_WORD0
+    0x00000000, // CB_COLOR1_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_BASE
+    0x00000000, // CB_COLOR2_PITCH
+    0x00000000, // CB_COLOR2_SLICE
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR2_CMASK
+    0x00000000, // CB_COLOR2_CMASK_SLICE
+    0x00000000, // CB_COLOR2_FMASK
+    0x00000000, // CB_COLOR2_FMASK_SLICE
+    0x00000000, // CB_COLOR2_CLEAR_WORD0
+    0x00000000, // CB_COLOR2_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_BASE
+    0x00000000, // CB_COLOR3_PITCH
+    0x00000000, // CB_COLOR3_SLICE
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR3_CMASK
+    0x00000000, // CB_COLOR3_CMASK_SLICE
+    0x00000000, // CB_COLOR3_FMASK
+    0x00000000, // CB_COLOR3_FMASK_SLICE
+    0x00000000, // CB_COLOR3_CLEAR_WORD0
+    0x00000000, // CB_COLOR3_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_BASE
+    0x00000000, // CB_COLOR4_PITCH
+    0x00000000, // CB_COLOR4_SLICE
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR4_CMASK
+    0x00000000, // CB_COLOR4_CMASK_SLICE
+    0x00000000, // CB_COLOR4_FMASK
+    0x00000000, // CB_COLOR4_FMASK_SLICE
+    0x00000000, // CB_COLOR4_CLEAR_WORD0
+    0x00000000, // CB_COLOR4_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_BASE
+    0x00000000, // CB_COLOR5_PITCH
+    0x00000000, // CB_COLOR5_SLICE
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR5_CMASK
+    0x00000000, // CB_COLOR5_CMASK_SLICE
+    0x00000000, // CB_COLOR5_FMASK
+    0x00000000, // CB_COLOR5_FMASK_SLICE
+    0x00000000, // CB_COLOR5_CLEAR_WORD0
+    0x00000000, // CB_COLOR5_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_BASE
+    0x00000000, // CB_COLOR6_PITCH
+    0x00000000, // CB_COLOR6_SLICE
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR6_CMASK
+    0x00000000, // CB_COLOR6_CMASK_SLICE
+    0x00000000, // CB_COLOR6_FMASK
+    0x00000000, // CB_COLOR6_FMASK_SLICE
+    0x00000000, // CB_COLOR6_CLEAR_WORD0
+    0x00000000, // CB_COLOR6_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_BASE
+    0x00000000, // CB_COLOR7_PITCH
+    0x00000000, // CB_COLOR7_SLICE
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR7_CMASK
+    0x00000000, // CB_COLOR7_CMASK_SLICE
+    0x00000000, // CB_COLOR7_FMASK
+    0x00000000, // CB_COLOR7_FMASK_SLICE
+    0x00000000, // CB_COLOR7_CLEAR_WORD0
+    0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def si_SECT_CONTEXT_defs[] = {
+    {si_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+    {si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+    {si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+    {si_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+    {si_SECT_CONTEXT_def_5, 0x0000a2a1, 1 },
+    {si_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {si_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+    { NULL, 0, 0 }
+};
+static const struct cs_section_def si_cs_data[] = {
+    { si_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { NULL, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h b/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h
new file mode 100644
index 000000000000..1aab9bef9349
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_vi.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int vi_SECT_CONTEXT_def_1[] =
+{
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0, // HOLE
+    0x00000000, // DB_DEPTH_INFO
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_DEPTH_SIZE
+    0x00000000, // DB_DEPTH_SLICE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // TA_BC_BASE_ADDR
+    0x00000000, // TA_BC_BASE_ADDR_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_HI_0
+    0x00000000, // COHER_DEST_BASE_HI_1
+    0x00000000, // COHER_DEST_BASE_HI_2
+    0x00000000, // COHER_DEST_BASE_HI_3
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int vi_SECT_CONTEXT_def_2[] =
+{
+    0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+    0, // HOLE
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_RINGID
+    0x00000000, // CP_VMID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0xffffffff, // VGT_MAX_VTX_INDX
+    0x00000000, // VGT_MIN_VTX_INDX
+    0x00000000, // VGT_INDX_OFFSET
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0, // HOLE
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0x00000000, // CB_DCC_CONTROL
+    0, // HOLE
+    0x00000000, // DB_STENCIL_CONTROL
+    0x00000000, // DB_STENCILREFMASK
+    0x00000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0, // HOLE
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int vi_SECT_CONTEXT_def_3[] =
+{
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+    0x00000000, // VGT_DMA_BASE_HI
+    0x00000000, // VGT_DMA_BASE
+};
+static const unsigned int vi_SECT_CONTEXT_def_4[] =
+{
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0x00000000, // VGT_OUTPUT_PATH_CNTL
+    0x00000000, // VGT_HOS_CNTL
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0x00000000, // VGT_HOS_REUSE_DEPTH
+    0x00000000, // VGT_GROUP_PRIM_TYPE
+    0x00000000, // VGT_GROUP_FIRST_DECR
+    0x00000000, // VGT_GROUP_DECR
+    0x00000000, // VGT_GROUP_VECT_0_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_CNTL
+    0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+    0x00000000, // VGT_GS_MODE
+    0x00000000, // VGT_GS_ONCHIP_CNTL
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0x00000100, // VGT_GS_PER_ES
+    0x00000080, // VGT_ES_PER_GS
+    0x00000002, // VGT_GS_PER_VS
+    0x00000000, // VGT_GSVS_RING_OFFSET_1
+    0x00000000, // VGT_GSVS_RING_OFFSET_2
+    0x00000000, // VGT_GSVS_RING_OFFSET_3
+    0x00000000, // VGT_GS_OUT_PRIM_TYPE
+    0x00000000, // IA_ENHANCE
+};
+static const unsigned int vi_SECT_CONTEXT_def_5[] =
+{
+    0x00000000, // WD_ENHANCE
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int vi_SECT_CONTEXT_def_6[] =
+{
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int vi_SECT_CONTEXT_def_7[] =
+{
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_INSTANCE_STEP_RATE_0
+    0x00000000, // VGT_INSTANCE_STEP_RATE_1
+    0x000000ff, // IA_MULTI_VGT_PARAM
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0x00000000, // VGT_GSVS_RING_ITEMSIZE
+    0x00000000, // VGT_REUSE_OFF
+    0x00000000, // VGT_VTX_CNT_EN
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_TESS_DISTRIBUTION
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0x00000000, // VGT_GS_VERT_ITEMSIZE
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0x00000000, // VGT_DISPATCH_DRAW_INDEX
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0x00000000, // VGT_STRMOUT_CONFIG
+    0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+    0x00000020, // VGT_OUT_DEALLOC_CNTL
+    0x00000000, // CB_COLOR0_BASE
+    0x00000000, // CB_COLOR0_PITCH
+    0x00000000, // CB_COLOR0_SLICE
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0x00000000, // CB_COLOR0_DCC_CONTROL
+    0x00000000, // CB_COLOR0_CMASK
+    0x00000000, // CB_COLOR0_CMASK_SLICE
+    0x00000000, // CB_COLOR0_FMASK
+    0x00000000, // CB_COLOR0_FMASK_SLICE
+    0x00000000, // CB_COLOR0_CLEAR_WORD0
+    0x00000000, // CB_COLOR0_CLEAR_WORD1
+    0x00000000, // CB_COLOR0_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_BASE
+    0x00000000, // CB_COLOR1_PITCH
+    0x00000000, // CB_COLOR1_SLICE
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0x00000000, // CB_COLOR1_DCC_CONTROL
+    0x00000000, // CB_COLOR1_CMASK
+    0x00000000, // CB_COLOR1_CMASK_SLICE
+    0x00000000, // CB_COLOR1_FMASK
+    0x00000000, // CB_COLOR1_FMASK_SLICE
+    0x00000000, // CB_COLOR1_CLEAR_WORD0
+    0x00000000, // CB_COLOR1_CLEAR_WORD1
+    0x00000000, // CB_COLOR1_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_BASE
+    0x00000000, // CB_COLOR2_PITCH
+    0x00000000, // CB_COLOR2_SLICE
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0x00000000, // CB_COLOR2_DCC_CONTROL
+    0x00000000, // CB_COLOR2_CMASK
+    0x00000000, // CB_COLOR2_CMASK_SLICE
+    0x00000000, // CB_COLOR2_FMASK
+    0x00000000, // CB_COLOR2_FMASK_SLICE
+    0x00000000, // CB_COLOR2_CLEAR_WORD0
+    0x00000000, // CB_COLOR2_CLEAR_WORD1
+    0x00000000, // CB_COLOR2_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_BASE
+    0x00000000, // CB_COLOR3_PITCH
+    0x00000000, // CB_COLOR3_SLICE
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0x00000000, // CB_COLOR3_DCC_CONTROL
+    0x00000000, // CB_COLOR3_CMASK
+    0x00000000, // CB_COLOR3_CMASK_SLICE
+    0x00000000, // CB_COLOR3_FMASK
+    0x00000000, // CB_COLOR3_FMASK_SLICE
+    0x00000000, // CB_COLOR3_CLEAR_WORD0
+    0x00000000, // CB_COLOR3_CLEAR_WORD1
+    0x00000000, // CB_COLOR3_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_BASE
+    0x00000000, // CB_COLOR4_PITCH
+    0x00000000, // CB_COLOR4_SLICE
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0x00000000, // CB_COLOR4_DCC_CONTROL
+    0x00000000, // CB_COLOR4_CMASK
+    0x00000000, // CB_COLOR4_CMASK_SLICE
+    0x00000000, // CB_COLOR4_FMASK
+    0x00000000, // CB_COLOR4_FMASK_SLICE
+    0x00000000, // CB_COLOR4_CLEAR_WORD0
+    0x00000000, // CB_COLOR4_CLEAR_WORD1
+    0x00000000, // CB_COLOR4_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_BASE
+    0x00000000, // CB_COLOR5_PITCH
+    0x00000000, // CB_COLOR5_SLICE
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0x00000000, // CB_COLOR5_DCC_CONTROL
+    0x00000000, // CB_COLOR5_CMASK
+    0x00000000, // CB_COLOR5_CMASK_SLICE
+    0x00000000, // CB_COLOR5_FMASK
+    0x00000000, // CB_COLOR5_FMASK_SLICE
+    0x00000000, // CB_COLOR5_CLEAR_WORD0
+    0x00000000, // CB_COLOR5_CLEAR_WORD1
+    0x00000000, // CB_COLOR5_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_BASE
+    0x00000000, // CB_COLOR6_PITCH
+    0x00000000, // CB_COLOR6_SLICE
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0x00000000, // CB_COLOR6_DCC_CONTROL
+    0x00000000, // CB_COLOR6_CMASK
+    0x00000000, // CB_COLOR6_CMASK_SLICE
+    0x00000000, // CB_COLOR6_FMASK
+    0x00000000, // CB_COLOR6_FMASK_SLICE
+    0x00000000, // CB_COLOR6_CLEAR_WORD0
+    0x00000000, // CB_COLOR6_CLEAR_WORD1
+    0x00000000, // CB_COLOR6_DCC_BASE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_BASE
+    0x00000000, // CB_COLOR7_PITCH
+    0x00000000, // CB_COLOR7_SLICE
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0x00000000, // CB_COLOR7_DCC_CONTROL
+    0x00000000, // CB_COLOR7_CMASK
+    0x00000000, // CB_COLOR7_CMASK_SLICE
+    0x00000000, // CB_COLOR7_FMASK
+    0x00000000, // CB_COLOR7_FMASK_SLICE
+    0x00000000, // CB_COLOR7_CLEAR_WORD0
+    0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def vi_SECT_CONTEXT_defs[] =
+{
+    {vi_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+    {vi_SECT_CONTEXT_def_2, 0x0000a0d6, 274 },
+    {vi_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+    {vi_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+    {vi_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+    {vi_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {vi_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+    { 0, 0, 0 }
+};
+static const struct cs_section_def vi_cs_data[] = {
+    { vi_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
new file mode 100644
index 000000000000..ed1e25661706
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "cyan_skillfish_ip_offset.h"
+
+int cyan_skillfish_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialized the blocke needed by driver */
+	uint32_t i;
+
+	adev->gfx.xcc_mask = 1;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
new file mode 100644
index 000000000000..bc7a2e06ab5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "vid.h"
+
+#include "oss/oss_3_0_1_d.h"
+#include "oss/oss_3_0_1_sh_mask.h"
+
+#include "bif/bif_5_1_d.h"
+#include "bif/bif_5_1_sh_mask.h"
+
+/*
+ * Interrupts
+ * Starting with r6xx, interrupts are handled via a ring buffer.
+ * Ring buffers are areas of GPU accessible memory that the GPU
+ * writes interrupt vectors into and the host reads vectors out of.
+ * There is a rptr (read pointer) that determines where the
+ * host is currently reading, and a wptr (write pointer)
+ * which determines where the GPU has written.  When the
+ * pointers are equal, the ring is idle.  When the GPU
+ * writes vectors to the ring buffer, it increments the
+ * wptr.  When there is an interrupt, the host then starts
+ * fetching commands and processing them until the pointers are
+ * equal again at which point it updates the rptr.
+ */
+
+static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * cz_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (VI).
+ */
+static void cz_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+	u32 ih_cntl = RREG32(mmIH_CNTL);
+	u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+
+	ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
+	WREG32(mmIH_CNTL, ih_cntl);
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+	adev->irq.ih.enabled = true;
+}
+
+/**
+ * cz_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (VI).
+ */
+static void cz_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+	u32 ih_rb_cntl = RREG32(mmIH_RB_CNTL);
+	u32 ih_cntl = RREG32(mmIH_CNTL);
+
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
+	ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, ENABLE_INTR, 0);
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+	WREG32(mmIH_CNTL, ih_cntl);
+	/* set rptr, wptr to 0 */
+	WREG32(mmIH_RB_RPTR, 0);
+	WREG32(mmIH_RB_WPTR, 0);
+	adev->irq.ih.enabled = false;
+	adev->irq.ih.rptr = 0;
+}
+
+/**
+ * cz_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (VI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int cz_ih_irq_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ih_ring *ih = &adev->irq.ih;
+	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
+	int rb_bufsz;
+
+	/* disable irqs */
+	cz_ih_disable_interrupts(adev);
+
+	/* setup interrupt control */
+	WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+	interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
+	/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+	 */
+	interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+	/* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+	interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+	WREG32(mmINTERRUPT_CNTL, interrupt_cntl);
+
+	/* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+	WREG32(mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8);
+
+	rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4);
+	ih_rb_cntl = REG_SET_FIELD(0, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+
+	/* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */
+	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);
+
+	/* set the writeback address whether it's enabled or not */
+	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);
+
+	WREG32(mmIH_RB_CNTL, ih_rb_cntl);
+
+	/* set rptr, wptr to 0 */
+	WREG32(mmIH_RB_RPTR, 0);
+	WREG32(mmIH_RB_WPTR, 0);
+
+	/* Default settings for IH_CNTL (disabled at first) */
+	ih_cntl = RREG32(mmIH_CNTL);
+	ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, MC_VMID, 0);
+
+	if (adev->irq.msi_enabled)
+		ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL, RPTR_REARM, 1);
+	WREG32(mmIH_CNTL, ih_cntl);
+
+	pci_set_master(adev->pdev);
+
+	/* enable interrupts */
+	cz_ih_enable_interrupts(adev);
+
+	if (adev->irq.ih_soft.ring_size)
+		adev->irq.ih_soft.enabled = true;
+
+	return 0;
+}
+
+/**
+ * cz_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (VI).
+ */
+static void cz_ih_irq_disable(struct amdgpu_device *adev)
+{
+	cz_ih_disable_interrupts(adev);
+
+	/* Wait and acknowledge irq */
+	mdelay(1);
+}
+
+/**
+ * cz_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to fetch wptr
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (VI).  Also check for
+ * ring buffer overflow and deal with it.
+ * Used by cz_irq_process(VI).
+ * Returns the value of the wptr.
+ */
+static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
+			  struct amdgpu_ih_ring *ih)
+{
+	u32 wptr, tmp;
+
+	wptr = le32_to_cpu(*ih->wptr_cpu);
+
+	if (ih == &adev->irq.ih_soft)
+		goto out;
+
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	/* Double check that the overflow wasn't already cleared. */
+	wptr = RREG32(mmIH_RB_WPTR);
+
+	if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+		goto out;
+
+	wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+	/* When a ring buffer overflow happen start parsing interrupt
+	 * from the last not overwritten vector (wptr + 16). Hopefully
+	 * this should allow us to catchup.
+	 */
+	dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+		wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+	ih->rptr = (wptr + 16) & ih->ptr_mask;
+	tmp = RREG32(mmIH_RB_CNTL);
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+	WREG32(mmIH_RB_CNTL, tmp);
+
+	/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+	 * can be detected.
+	 */
+	tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+	WREG32(mmIH_RB_CNTL, tmp);
+
+out:
+	return (wptr & ih->ptr_mask);
+}
+
+/**
+ * cz_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to decode
+ * @entry: IV entry to place decoded information into
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void cz_ih_decode_iv(struct amdgpu_device *adev,
+			    struct amdgpu_ih_ring *ih,
+			    struct amdgpu_iv_entry *entry)
+{
+	/* wptr/rptr are in bytes! */
+	u32 ring_index = ih->rptr >> 2;
+	uint32_t dw[4];
+
+	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+
+	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+	entry->src_id = dw[0] & 0xff;
+	entry->src_data[0] = dw[1] & 0xfffffff;
+	entry->ring_id = dw[2] & 0xff;
+	entry->vmid = (dw[2] >> 8) & 0xff;
+	entry->pasid = (dw[2] >> 16) & 0xffff;
+
+	/* wptr/rptr are in bytes! */
+	ih->rptr += 16;
+}
+
+/**
+ * cz_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: IH ring buffer to set rptr
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void cz_ih_set_rptr(struct amdgpu_device *adev,
+			   struct amdgpu_ih_ring *ih)
+{
+	WREG32(mmIH_RB_RPTR, ih->rptr);
+}
+
+static int cz_ih_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret;
+
+	ret = amdgpu_irq_add_domain(adev);
+	if (ret)
+		return ret;
+
+	cz_ih_set_interrupt_funcs(adev);
+
+	return 0;
+}
+
+static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
+	if (r)
+		return r;
+
+	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_init(adev);
+
+	return r;
+}
+
+static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	amdgpu_irq_fini_sw(adev);
+	amdgpu_irq_remove_domain(adev);
+
+	return 0;
+}
+
+static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	r = cz_ih_irq_init(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	cz_ih_irq_disable(ip_block->adev);
+
+	return 0;
+}
+
+static int cz_ih_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return cz_ih_hw_fini(ip_block);
+}
+
+static int cz_ih_resume(struct amdgpu_ip_block *ip_block)
+{
+	return cz_ih_hw_init(ip_block);
+}
+
+static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 tmp = RREG32(mmSRBM_STATUS);
+
+	if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+		return false;
+
+	return true;
+}
+
+static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned i;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(mmSRBM_STATUS);
+		if (!REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 srbm_soft_reset = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	u32 tmp = RREG32(mmSRBM_STATUS);
+
+	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
+		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
+						SOFT_RESET_IH, 1);
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+
+	return 0;
+}
+
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	// TODO
+	return 0;
+}
+
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	// TODO
+	return 0;
+}
+
+static const struct amd_ip_funcs cz_ih_ip_funcs = {
+	.name = "cz_ih",
+	.early_init = cz_ih_early_init,
+	.sw_init = cz_ih_sw_init,
+	.sw_fini = cz_ih_sw_fini,
+	.hw_init = cz_ih_hw_init,
+	.hw_fini = cz_ih_hw_fini,
+	.suspend = cz_ih_suspend,
+	.resume = cz_ih_resume,
+	.is_idle = cz_ih_is_idle,
+	.wait_for_idle = cz_ih_wait_for_idle,
+	.soft_reset = cz_ih_soft_reset,
+	.set_clockgating_state = cz_ih_set_clockgating_state,
+	.set_powergating_state = cz_ih_set_powergating_state,
+};
+
+static const struct amdgpu_ih_funcs cz_ih_funcs = {
+	.get_wptr = cz_ih_get_wptr,
+	.decode_iv = cz_ih_decode_iv,
+	.set_rptr = cz_ih_set_rptr
+};
+
+static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+	adev->irq.ih_funcs = &cz_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version cz_ih_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_IH,
+	.major = 3,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &cz_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.h b/drivers/gpu/drm/amd/amdgpu/cz_ih.h
new file mode 100644
index 000000000000..14be7753221b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __CZ_IH_H__
+#define __CZ_IH_H__
+
+extern const struct amdgpu_ip_block_version cz_ih_ip_block;
+
+#endif /* __CZ_IH_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
new file mode 100644
index 000000000000..72ca6538b2e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -0,0 +1,3679 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_edid.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "vid.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "dce_v10_0.h"
+
+#include "dce/dce_10_0_d.h"
+#include "dce/dce_10_0_sh_mask.h"
+#include "dce/dce_10_0_enum.h"
+#include "oss/oss_3_0_d.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+
+#include "ivsrcid/ivsrcid_vislands30.h"
+
+static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd);
+
+static const u32 crtc_offsets[] = {
+	CRTC0_REGISTER_OFFSET,
+	CRTC1_REGISTER_OFFSET,
+	CRTC2_REGISTER_OFFSET,
+	CRTC3_REGISTER_OFFSET,
+	CRTC4_REGISTER_OFFSET,
+	CRTC5_REGISTER_OFFSET,
+	CRTC6_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] = {
+	HPD0_REGISTER_OFFSET,
+	HPD1_REGISTER_OFFSET,
+	HPD2_REGISTER_OFFSET,
+	HPD3_REGISTER_OFFSET,
+	HPD4_REGISTER_OFFSET,
+	HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+	DIG0_REGISTER_OFFSET,
+	DIG1_REGISTER_OFFSET,
+	DIG2_REGISTER_OFFSET,
+	DIG3_REGISTER_OFFSET,
+	DIG4_REGISTER_OFFSET,
+	DIG5_REGISTER_OFFSET,
+	DIG6_REGISTER_OFFSET
+};
+
+static const struct {
+	uint32_t        reg;
+	uint32_t        vblank;
+	uint32_t        vline;
+	uint32_t        hpd;
+
+} interrupt_status_offsets[] = { {
+	.reg = mmDISP_INTERRUPT_STATUS,
+	.vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static const u32 golden_settings_tonga_a11[] = {
+	mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+	mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+	mmFBC_MISC, 0x1f311fff, 0x12300000,
+	mmHDMI_CONTROL, 0x31000111, 0x00000011,
+};
+
+static const u32 tonga_mgcg_cgcg_init[] = {
+	mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+	mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+};
+
+static const u32 golden_settings_fiji_a10[] = {
+	mmDCI_CLK_CNTL, 0x00000080, 0x00000000,
+	mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070,
+	mmFBC_MISC, 0x1f311fff, 0x12300000,
+	mmHDMI_CONTROL, 0x31000111, 0x00000011,
+};
+
+static const u32 fiji_mgcg_cgcg_init[] = {
+	mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100,
+	mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000,
+};
+
+static void dce_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+		amdgpu_device_program_register_sequence(adev,
+							fiji_mgcg_cgcg_init,
+							ARRAY_SIZE(fiji_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_fiji_a10,
+							ARRAY_SIZE(golden_settings_fiji_a10));
+		break;
+	case CHIP_TONGA:
+		amdgpu_device_program_register_sequence(adev,
+							tonga_mgcg_cgcg_init,
+							ARRAY_SIZE(tonga_mgcg_cgcg_init));
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_tonga_a11,
+							ARRAY_SIZE(golden_settings_tonga_a11));
+		break;
+	default:
+		break;
+	}
+}
+
+static u32 dce_v10_0_audio_endpt_rreg(struct amdgpu_device *adev,
+				     u32 block_offset, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+	return r;
+}
+
+static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
+				      u32 block_offset, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+	if (crtc >= adev->mode_info.num_crtc)
+		return 0;
+	else
+		return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v10_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Enable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Disable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v10_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
+ */
+static void dce_v10_0_page_flip(struct amdgpu_device *adev,
+				int crtc_id, u64 crtc_base, bool async)
+{
+	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+	u32 tmp;
+
+	/* flip at hsync for async, default is vsync */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	/* update pitch */
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+	       fb->pitches[0] / fb->format->cpp[0]);
+	/* update the primary scanout address */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       lower_32_bits(crtc_base));
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+					u32 *vbl, u32 *position)
+{
+	if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+		return -EINVAL;
+
+	*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+	*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+	return 0;
+}
+
+/**
+ * dce_v10_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v10_0_hpd_sense(struct amdgpu_device *adev,
+			       enum amdgpu_hpd_id hpd)
+{
+	bool connected = false;
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return connected;
+
+	if (RREG32(mmDC_HPD_INT_STATUS + hpd_offsets[hpd]) &
+	    DC_HPD_INT_STATUS__DC_HPD_SENSE_MASK)
+		connected = true;
+
+	return connected;
+}
+
+/**
+ * dce_v10_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v10_0_hpd_set_polarity(struct amdgpu_device *adev,
+				      enum amdgpu_hpd_id hpd)
+{
+	u32 tmp;
+	bool connected = dce_v10_0_hpd_sense(adev, hpd);
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return;
+
+	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+	if (connected)
+		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 0);
+	else
+		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_POLARITY, 1);
+	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v10_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v10_0_hpd_init(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+			/* don't try to enable hpd on eDP or LVDS avoid breaking the
+			 * aux dp channel on imac and help (but not completely fix)
+			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+			 * also avoid interrupt storms during dpms.
+			 */
+			tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+			tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+			WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+			continue;
+		}
+
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 1);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		tmp = RREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+				    DC_HPD_CONNECT_INT_DELAY,
+				    AMDGPU_HPD_CONNECT_INT_DELAY_IN_MS);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_TOGGLE_FILT_CNTL,
+				    DC_HPD_DISCONNECT_INT_DELAY,
+				    AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS);
+		WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
+		dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+		amdgpu_irq_get(adev, &adev->hpd_irq,
+			       amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v10_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v10_0_hpd_fini(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_CONTROL, DC_HPD_EN, 0);
+		WREG32(mmDC_HPD_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		amdgpu_irq_put(adev, &adev->hpd_irq,
+			       amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v10_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+	return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev)
+{
+	u32 crtc_hung = 0;
+	u32 crtc_status[6];
+	u32 i, j, tmp;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+		if (REG_GET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN)) {
+			crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+			crtc_hung |= (1 << i);
+		}
+	}
+
+	for (j = 0; j < 10; j++) {
+		for (i = 0; i < adev->mode_info.num_crtc; i++) {
+			if (crtc_hung & (1 << i)) {
+				tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+				if (tmp != crtc_status[i])
+					crtc_hung &= ~(1 << i);
+			}
+		}
+		if (crtc_hung == 0)
+			return false;
+		udelay(100);
+	}
+
+	return true;
+}
+
+static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev,
+					   bool render)
+{
+	u32 tmp;
+
+	/* Lockout access through VGA aperture*/
+	tmp = RREG32(mmVGA_HDP_CONTROL);
+	if (render)
+		tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
+	else
+		tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+	WREG32(mmVGA_HDP_CONTROL, tmp);
+
+	/* disable VGA render */
+	tmp = RREG32(mmVGA_RENDER_CONTROL);
+	if (render)
+		tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+	WREG32(mmVGA_RENDER_CONTROL, tmp);
+}
+
+static int dce_v10_0_get_num_crtc(struct amdgpu_device *adev)
+{
+	int num_crtc = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+		num_crtc = 6;
+		break;
+	default:
+		num_crtc = 0;
+	}
+	return num_crtc;
+}
+
+void dce_v10_0_disable_dce(struct amdgpu_device *adev)
+{
+	/*Disable VGA render and enabled crtc, if has DCE engine*/
+	if (amdgpu_atombios_has_dce_engine_info(adev)) {
+		u32 tmp;
+		int crtc_enabled, i;
+
+		dce_v10_0_set_vga_render_state(adev, false);
+
+		/*Disable crtc*/
+		for (i = 0; i < dce_v10_0_get_num_crtc(adev); i++) {
+			crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
+									 CRTC_CONTROL, CRTC_MASTER_EN);
+			if (crtc_enabled) {
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+				tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
+				WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+			}
+		}
+	}
+}
+
+static void dce_v10_0_program_fmt(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	int bpc = 0;
+	u32 tmp = 0;
+	enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		bpc = amdgpu_connector_get_monitor_bpc(connector);
+		dither = amdgpu_connector->dither;
+	}
+
+	/* LVDS/eDP FMT is set up by atom */
+	if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+		return;
+
+	/* not needed for analog */
+	if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
+	    (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
+		return;
+
+	if (bpc == 0)
+		return;
+
+	switch (bpc) {
+	case 6:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+			/* XXX sort out optimal dither settings */
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 0);
+		} else {
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 0);
+		}
+		break;
+	case 8:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+			/* XXX sort out optimal dither settings */
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 1);
+		} else {
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 1);
+		}
+		break;
+	case 10:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE) {
+			/* XXX sort out optimal dither settings */
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_FRAME_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_HIGHPASS_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_RGB_RANDOM_ENABLE, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_SPATIAL_DITHER_DEPTH, 2);
+		} else {
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, 1);
+			tmp = REG_SET_FIELD(tmp, FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_DEPTH, 2);
+		}
+		break;
+	default:
+		/* not needed */
+		break;
+	}
+
+	WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+
+/* display watermark setup */
+/**
+ * dce_v10_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v10_0_line_buffer_adjust(struct amdgpu_device *adev,
+				       struct amdgpu_crtc *amdgpu_crtc,
+				       struct drm_display_mode *mode)
+{
+	u32 tmp, buffer_alloc, i, mem_cfg;
+	u32 pipe_offset = amdgpu_crtc->crtc_id;
+	/*
+	 * Line Buffer Setup
+	 * There are 6 line buffers, one for each display controllers.
+	 * There are 3 partitions per LB. Select the number of partitions
+	 * to enable based on the display width.  For display widths larger
+	 * than 4096, you need use to use 2 display controllers and combine
+	 * them using the stereo blender.
+	 */
+	if (amdgpu_crtc->base.enabled && mode) {
+		if (mode->crtc_hdisplay < 1920) {
+			mem_cfg = 1;
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 2560) {
+			mem_cfg = 2;
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 4096) {
+			mem_cfg = 0;
+			buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+		} else {
+			DRM_DEBUG_KMS("Mode too big for LB!\n");
+			mem_cfg = 0;
+			buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+		}
+	} else {
+		mem_cfg = 1;
+		buffer_alloc = 0;
+	}
+
+	tmp = RREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, LB_MEMORY_CTRL, LB_MEMORY_CONFIG, mem_cfg);
+	WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+	tmp = REG_SET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATED, buffer_alloc);
+	WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset, tmp);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset);
+		if (REG_GET_FIELD(tmp, PIPE0_DMIF_BUFFER_CONTROL, DMIF_BUFFERS_ALLOCATION_COMPLETED))
+			break;
+		udelay(1);
+	}
+
+	if (amdgpu_crtc->base.enabled && mode) {
+		switch (mem_cfg) {
+		case 0:
+		default:
+			return 4096 * 2;
+		case 1:
+			return 1920 * 2;
+		case 2:
+			return 2560 * 2;
+		}
+	}
+
+	/* controller not enabled, so no lb used */
+	return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+	switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+	case 0:
+	default:
+		return 1;
+	case 1:
+		return 2;
+	case 2:
+		return 4;
+	case 3:
+		return 8;
+	case 4:
+		return 3;
+	case 5:
+		return 6;
+	case 6:
+		return 10;
+	case 7:
+		return 12;
+	case 8:
+		return 16;
+	}
+}
+
+struct dce10_wm_params {
+	u32 dram_channels; /* number of dram channels */
+	u32 yclk;          /* bandwidth per dram data pin in kHz */
+	u32 sclk;          /* engine clock in kHz */
+	u32 disp_clk;      /* display clock in kHz */
+	u32 src_width;     /* viewport width */
+	u32 active_time;   /* active display time in ns */
+	u32 blank_time;    /* blank time in ns */
+	bool interlaced;    /* mode is interlaced */
+	fixed20_12 vsc;    /* vertical scale ratio */
+	u32 num_heads;     /* number of active crtcs */
+	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+	u32 lb_size;       /* line buffer allocated to pipe */
+	u32 vtaps;         /* vertical scaler taps */
+};
+
+/**
+ * dce_v10_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_dram_bandwidth(struct dce10_wm_params *wm)
+{
+	/* Calculate raw DRAM Bandwidth */
+	fixed20_12 dram_efficiency; /* 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	dram_efficiency.full = dfixed_const(7);
+	dram_efficiency.full = dfixed_div(dram_efficiency, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v10_0_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+	/* Calculate DRAM Bandwidth and the part allocated to display. */
+	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_data_return_bandwidth(struct dce10_wm_params *wm)
+{
+	/* Calculate the display Data return Bandwidth */
+	fixed20_12 return_efficiency; /* 0.8 */
+	fixed20_12 sclk, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	sclk.full = dfixed_const(wm->sclk);
+	sclk.full = dfixed_div(sclk, a);
+	a.full = dfixed_const(10);
+	return_efficiency.full = dfixed_const(8);
+	return_efficiency.full = dfixed_div(return_efficiency, a);
+	a.full = dfixed_const(32);
+	bandwidth.full = dfixed_mul(a, sclk);
+	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_dmif_request_bandwidth(struct dce10_wm_params *wm)
+{
+	/* Calculate the DMIF Request Bandwidth */
+	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+	fixed20_12 disp_clk, bandwidth;
+	fixed20_12 a, b;
+
+	a.full = dfixed_const(1000);
+	disp_clk.full = dfixed_const(wm->disp_clk);
+	disp_clk.full = dfixed_div(disp_clk, a);
+	a.full = dfixed_const(32);
+	b.full = dfixed_mul(a, disp_clk);
+
+	a.full = dfixed_const(10);
+	disp_clk_request_efficiency.full = dfixed_const(8);
+	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_available_bandwidth(struct dce10_wm_params *wm)
+{
+	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+	u32 dram_bandwidth = dce_v10_0_dram_bandwidth(wm);
+	u32 data_return_bandwidth = dce_v10_0_data_return_bandwidth(wm);
+	u32 dmif_req_bandwidth = dce_v10_0_dmif_request_bandwidth(wm);
+
+	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v10_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v10_0_average_bandwidth(struct dce10_wm_params *wm)
+{
+	/* Calculate the display mode Average Bandwidth
+	 * DisplayMode should contain the source and destination dimensions,
+	 * timing, etc.
+	 */
+	fixed20_12 bpp;
+	fixed20_12 line_time;
+	fixed20_12 src_width;
+	fixed20_12 bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+	line_time.full = dfixed_div(line_time, a);
+	bpp.full = dfixed_const(wm->bytes_per_pixel);
+	src_width.full = dfixed_const(wm->src_width);
+	bandwidth.full = dfixed_mul(src_width, bpp);
+	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+	bandwidth.full = dfixed_div(bandwidth, line_time);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v10_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm)
+{
+	/* First calculate the latency in ns */
+	u32 mc_latency = 2000; /* 2000 ns. */
+	u32 available_bandwidth = dce_v10_0_available_bandwidth(wm);
+	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+		(wm->num_heads * cursor_line_pair_return_time);
+	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+	u32 tmp, dmif_size = 12288;
+	fixed20_12 a, b, c;
+
+	if (wm->num_heads == 0)
+		return 0;
+
+	a.full = dfixed_const(2);
+	b.full = dfixed_const(1);
+	if ((wm->vsc.full > a.full) ||
+	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+	    (wm->vtaps >= 5) ||
+	    ((wm->vsc.full >= a.full) && wm->interlaced))
+		max_src_lines_per_dst_line = 4;
+	else
+		max_src_lines_per_dst_line = 2;
+
+	a.full = dfixed_const(available_bandwidth);
+	b.full = dfixed_const(wm->num_heads);
+	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
+
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+	b.full = dfixed_const(1000);
+	c.full = dfixed_const(lb_fill_bw);
+	b.full = dfixed_div(c, b);
+	a.full = dfixed_div(a, b);
+	line_fill_time = dfixed_trunc(a);
+
+	if (line_fill_time < wm->active_time)
+		return latency;
+	else
+		return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce10_wm_params *wm)
+{
+	if (dce_v10_0_average_bandwidth(wm) <=
+	    (dce_v10_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v10_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_average_bandwidth_vs_available_bandwidth(struct dce10_wm_params *wm)
+{
+	if (dce_v10_0_average_bandwidth(wm) <=
+	    (dce_v10_0_available_bandwidth(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v10_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v10_0_check_latency_hiding(struct dce10_wm_params *wm)
+{
+	u32 lb_partitions = wm->lb_size / wm->src_width;
+	u32 line_time = wm->active_time + wm->blank_time;
+	u32 latency_tolerant_lines;
+	u32 latency_hiding;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1);
+	if (wm->vsc.full > a.full)
+		latency_tolerant_lines = 1;
+	else {
+		if (lb_partitions <= (wm->vtaps + 1))
+			latency_tolerant_lines = 1;
+		else
+			latency_tolerant_lines = 2;
+	}
+
+	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+	if (dce_v10_0_latency_watermark(wm) <= latency_hiding)
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v10_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
+					struct amdgpu_crtc *amdgpu_crtc,
+					u32 lb_size, u32 num_heads)
+{
+	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+	struct dce10_wm_params wm_low, wm_high;
+	u32 active_time;
+	u32 line_time = 0;
+	u32 latency_watermark_a = 0, latency_watermark_b = 0;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
+
+	if (amdgpu_crtc->base.enabled && num_heads && mode) {
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min_t(u32, line_time, 65535);
+
+		/* watermark for high clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_high.yclk =
+				amdgpu_dpm_get_mclk(adev, false) * 10;
+			wm_high.sclk =
+				amdgpu_dpm_get_sclk(adev, false) * 10;
+		} else {
+			wm_high.yclk = adev->pm.current_mclk * 10;
+			wm_high.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_high.disp_clk = mode->clock;
+		wm_high.src_width = mode->crtc_hdisplay;
+		wm_high.active_time = active_time;
+		wm_high.blank_time = line_time - wm_high.active_time;
+		wm_high.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_high.interlaced = true;
+		wm_high.vsc = amdgpu_crtc->vsc;
+		wm_high.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_high.vtaps = 2;
+		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_high.lb_size = lb_size;
+		wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
+		wm_high.num_heads = num_heads;
+
+		/* set for high clocks */
+		latency_watermark_a = min_t(u32, dce_v10_0_latency_watermark(&wm_high), 65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+		    !dce_v10_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+		    !dce_v10_0_check_latency_hiding(&wm_high) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+		}
+
+		/* watermark for low clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_low.yclk =
+				amdgpu_dpm_get_mclk(adev, true) * 10;
+			wm_low.sclk =
+				amdgpu_dpm_get_sclk(adev, true) * 10;
+		} else {
+			wm_low.yclk = adev->pm.current_mclk * 10;
+			wm_low.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_low.disp_clk = mode->clock;
+		wm_low.src_width = mode->crtc_hdisplay;
+		wm_low.active_time = active_time;
+		wm_low.blank_time = line_time - wm_low.active_time;
+		wm_low.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_low.interlaced = true;
+		wm_low.vsc = amdgpu_crtc->vsc;
+		wm_low.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_low.vtaps = 2;
+		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_low.lb_size = lb_size;
+		wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
+		wm_low.num_heads = num_heads;
+
+		/* set for low clocks */
+		latency_watermark_b = min_t(u32, dce_v10_0_latency_watermark(&wm_low), 65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce_v10_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+		    !dce_v10_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+		    !dce_v10_0_check_latency_hiding(&wm_low) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+	}
+
+	/* select wm A */
+	wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 1);
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_a);
+	tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	/* select wm B */
+	tmp = REG_SET_FIELD(wm_mask, DPG_WATERMARK_MASK_CONTROL, URGENCY_WATERMARK_MASK, 2);
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	tmp = RREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, latency_watermark_b);
+	tmp = REG_SET_FIELD(tmp, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, line_time);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	/* restore original selection */
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
+
+	/* save values for DPM */
+	amdgpu_crtc->line_time = line_time;
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/**
+ * dce_v10_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev)
+{
+	struct drm_display_mode *mode = NULL;
+	u32 num_heads = 0, lb_size;
+	int i;
+
+	amdgpu_display_update_priority(adev);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i]->base.enabled)
+			num_heads++;
+	}
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		mode = &adev->mode_info.crtcs[i]->base.mode;
+		lb_size = dce_v10_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
+		dce_v10_0_program_watermarks(adev, adev->mode_info.crtcs[i],
+					    lb_size, num_heads);
+	}
+}
+
+static void dce_v10_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+	int i;
+	u32 offset, tmp;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		offset = adev->mode_info.audio.pin[i].offset;
+		tmp = RREG32_AUDIO_ENDPT(offset,
+					 ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+		if (((tmp &
+		AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
+		AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
+			adev->mode_info.audio.pin[i].connected = false;
+		else
+			adev->mode_info.audio.pin[i].connected = true;
+	}
+}
+
+static struct amdgpu_audio_pin *dce_v10_0_audio_get_pin(struct amdgpu_device *adev)
+{
+	int i;
+
+	dce_v10_0_audio_get_connected_pins(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		if (adev->mode_info.audio.pin[i].connected)
+			return &adev->mode_info.audio.pin[i];
+	}
+	DRM_ERROR("No connected audio pins found!\n");
+	return NULL;
+}
+
+static void dce_v10_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	tmp = RREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, dig->afmt->pin->id);
+	WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v10_0_audio_write_latency_fields(struct drm_encoder *encoder,
+						struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u32 tmp;
+	int interlace = 0;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		interlace = 1;
+	if (connector->latency_present[interlace]) {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				    VIDEO_LIPSYNC, connector->video_latency[interlace]);
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				    AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+	} else {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				    VIDEO_LIPSYNC, 0);
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				    AUDIO_LIPSYNC, 0);
+	}
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			   ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v10_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u32 tmp;
+	u8 *sadb = NULL;
+	int sad_count;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		sad_count = 0;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+				 ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			    DP_CONNECTION, 0);
+	/* set HDMI mode */
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			    HDMI_CONNECTION, 1);
+	if (sad_count)
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				    SPEAKER_ALLOCATION, sadb[0]);
+	else
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				    SPEAKER_ALLOCATION, 5); /* stereo */
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			   ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+static void dce_v10_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+	if (sad_count < 0)
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
+		return;
+	BUG_ON(!sads);
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 tmp = 0;
+		u8 stereo_freqs = 0;
+		int max_channels = -1;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				if (sad->channels > max_channels) {
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							    MAX_CHANNELS, sad->channels);
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							    DESCRIPTOR_BYTE_2, sad->byte2);
+					tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+							    SUPPORTED_FREQUENCIES, sad->freq);
+					max_channels = sad->channels;
+				}
+
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					stereo_freqs |= sad->freq;
+				else
+					break;
+			}
+		}
+
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
+				    SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
+		WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+	}
+
+	kfree(sads);
+}
+
+static void dce_v10_0_audio_enable(struct amdgpu_device *adev,
+				  struct amdgpu_audio_pin *pin,
+				  bool enable)
+{
+	if (!pin)
+		return;
+
+	WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+			   enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[] = {
+	AUD0_REGISTER_OFFSET,
+	AUD1_REGISTER_OFFSET,
+	AUD2_REGISTER_OFFSET,
+	AUD3_REGISTER_OFFSET,
+	AUD4_REGISTER_OFFSET,
+	AUD5_REGISTER_OFFSET,
+	AUD6_REGISTER_OFFSET,
+};
+
+static int dce_v10_0_audio_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!amdgpu_audio)
+		return 0;
+
+	adev->mode_info.audio.enabled = true;
+
+	adev->mode_info.audio.num_pins = 7;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		adev->mode_info.audio.pin[i].channels = -1;
+		adev->mode_info.audio.pin[i].rate = -1;
+		adev->mode_info.audio.pin[i].bits_per_sample = -1;
+		adev->mode_info.audio.pin[i].status_bits = 0;
+		adev->mode_info.audio.pin[i].category_code = 0;
+		adev->mode_info.audio.pin[i].connected = false;
+		adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+		adev->mode_info.audio.pin[i].id = i;
+		/* disable audio.  it will be set up later */
+		/* XXX remove once we switch to ip funcs */
+		dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	return 0;
+}
+
+static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_audio)
+		return;
+
+	if (!adev->mode_info.audio.enabled)
+		return;
+
+	adev->mode_info.audio.enabled = false;
+}
+
+/*
+ * update the N and CTS parameters for a given pixel clock rate
+ */
+static void dce_v10_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+	WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+	WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+	WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+	WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+	WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+	WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+
+}
+
+/*
+ * build a HDMI Video Info Frame
+ */
+static void dce_v10_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
+					       void *buffer, size_t size)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	uint8_t *frame = buffer + 3;
+	uint8_t *header = buffer;
+
+	WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+		frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
+	WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+		frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
+	WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+		frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
+	WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+		frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
+}
+
+static void dce_v10_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	u32 dto_phase = 24 * 1000;
+	u32 dto_modulo = clock;
+	u32 tmp;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* XXX two dtos; generally use dto0 for hdmi */
+	/* Express [24MHz / target pixel clock] as an exact rational
+	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
+	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+	 */
+	tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+	tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL,
+			    amdgpu_crtc->crtc_id);
+	WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+	WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
+	WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+	struct hdmi_avi_infoframe frame;
+	ssize_t err;
+	u32 tmp;
+	int bpc = 8;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (!dig->afmt->enabled)
+		return;
+
+	/* hdmi deep color mode general control packets setup, if bpc > 8 */
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		bpc = amdgpu_crtc->bpc;
+	}
+
+	/* disable audio prior to setting up hw */
+	dig->afmt->pin = dce_v10_0_audio_get_pin(adev);
+	dce_v10_0_audio_enable(adev, dig->afmt->pin, false);
+
+	dce_v10_0_audio_set_dto(encoder, mode->clock);
+
+	tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); /* send null packets when required */
+
+	WREG32(mmAFMT_AUDIO_CRC_CONTROL + dig->afmt->offset, 0x1000);
+
+	tmp = RREG32(mmHDMI_CONTROL + dig->afmt->offset);
+	switch (bpc) {
+	case 0:
+	case 6:
+	case 8:
+	case 16:
+	default:
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+		DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+			  connector->name, bpc);
+		break;
+	case 10:
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 1);
+		DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+			  connector->name);
+		break;
+	case 12:
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 2);
+		DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+			  connector->name);
+		break;
+	}
+	WREG32(mmHDMI_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); /* send null packets when required */
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); /* send general control packets */
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); /* send general control packets every frame */
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+	/* enable audio info frames (frames won't be set until audio is enabled) */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+	/* required for audio info values to be updated */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+	WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+	/* required for audio info values to be updated */
+	tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+	WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+	/* anything other than 0 */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, 2);
+	WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+	WREG32(mmHDMI_GC + dig->afmt->offset, 0); /* unset HDMI_GC_AVMUTE */
+
+	tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	/* set the default audio delay */
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+	/* should be suffient for all audio modes and small enough for all hblanks */
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+	WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	/* allow 60958 channel status fields to be updated */
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+	if (bpc > 8)
+		/* clear SW CTS value */
+		tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 0);
+	else
+		/* select SW CTS value */
+		tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, 1);
+	/* allow hw to sent ACR packets when required */
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+	WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	dce_v10_0_afmt_update_ACR(encoder, mode->clock);
+
+	tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+	WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+	WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+	WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+	dce_v10_0_audio_write_speaker_allocation(encoder);
+
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset,
+	       (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
+
+	dce_v10_0_afmt_audio_select_pin(encoder);
+	dce_v10_0_audio_write_sad_regs(encoder);
+	dce_v10_0_audio_write_latency_fields(encoder, mode);
+
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+	if (err < 0) {
+		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	dce_v10_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+	/* enable AVI info frames */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+	/* required for audio info values to be updated */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+	WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+	WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	/* send audio packets */
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	WREG32(mmAFMT_RAMP_CONTROL0 + dig->afmt->offset, 0x00FFFFFF);
+	WREG32(mmAFMT_RAMP_CONTROL1 + dig->afmt->offset, 0x007FFFFF);
+	WREG32(mmAFMT_RAMP_CONTROL2 + dig->afmt->offset, 0x00000001);
+	WREG32(mmAFMT_RAMP_CONTROL3 + dig->afmt->offset, 0x00000001);
+
+	/* enable audio after to setting up hw */
+	dce_v10_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (enable && dig->afmt->enabled)
+		return;
+	if (!enable && !dig->afmt->enabled)
+		return;
+
+	if (!enable && dig->afmt->pin) {
+		dce_v10_0_audio_enable(adev, dig->afmt->pin, false);
+		dig->afmt->pin = NULL;
+	}
+
+	dig->afmt->enabled = enable;
+
+	DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++)
+		adev->mode_info.afmt[i] = NULL;
+
+	/* DCE10 has audio blocks tied to DIG encoders */
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+		if (adev->mode_info.afmt[i]) {
+			adev->mode_info.afmt[i]->offset = dig_offsets[i];
+			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		kfree(adev->mode_info.afmt[i]);
+		adev->mode_info.afmt[i] = NULL;
+	}
+}
+
+static const u32 vga_control_regs[6] = {
+	mmD1VGA_CONTROL,
+	mmD2VGA_CONTROL,
+	mmD3VGA_CONTROL,
+	mmD4VGA_CONTROL,
+	mmD5VGA_CONTROL,
+	mmD6VGA_CONTROL,
+};
+
+static void dce_v10_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 vga_control;
+
+	vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+	if (enable)
+		WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
+	else
+		WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
+}
+
+static void dce_v10_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (enable)
+		WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
+	else
+		WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
+}
+
+static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
+				     struct drm_framebuffer *fb,
+				     int x, int y, int atomic)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_framebuffer *target_fb;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *abo;
+	uint64_t fb_location, tiling_flags;
+	uint32_t fb_format, fb_pitch_pixels;
+	u32 fb_swap = REG_SET_FIELD(0, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, ENDIAN_NONE);
+	u32 pipe_config;
+	u32 tmp, viewport_w, viewport_h;
+	int r;
+	bool bypass_lut = false;
+
+	/* no fb bound */
+	if (!atomic && !crtc->primary->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	if (atomic)
+		target_fb = fb;
+	else
+		target_fb = crtc->primary->fb;
+
+	/* If atomic, assume fb object is pinned & idle & fenced and
+	 * just update base pointers
+	 */
+	obj = target_fb->obj[0];
+	abo = gem_to_amdgpu_bo(obj);
+	r = amdgpu_bo_reserve(abo, false);
+	if (unlikely(r != 0))
+		return r;
+
+	if (!atomic) {
+		abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+		if (unlikely(r != 0)) {
+			amdgpu_bo_unreserve(abo);
+			return -EINVAL;
+		}
+	}
+	fb_location = amdgpu_bo_gpu_offset(abo);
+
+	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+	amdgpu_bo_unreserve(abo);
+
+	pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+
+	switch (target_fb->format->format) {
+	case DRM_FORMAT_C8:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 0);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+		break;
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_ARGB4444:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 2);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN16);
+#endif
+		break;
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN16);
+#endif
+		break;
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_BGRA5551:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 5);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN16);
+#endif
+		break;
+	case DRM_FORMAT_RGB565:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 1);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN16);
+#endif
+		break;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 1);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_BGRA1010102:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 4);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+		fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+					ENDIAN_8IN32);
+#endif
+		break;
+	default:
+		DRM_ERROR("Unsupported screen format %p4cc\n",
+			  &target_fb->format->format);
+		return -EINVAL;
+	}
+
+	if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+		unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+		bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+		bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+		mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+		tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+		num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_NUM_BANKS, num_banks);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+					  ARRAY_2D_TILED_THIN1);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_TILE_SPLIT,
+					  tile_split);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_WIDTH, bankw);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_BANK_HEIGHT, bankh);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MACRO_TILE_ASPECT,
+					  mtaspect);
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_MICRO_TILE_MODE,
+					  ADDR_SURF_MICRO_TILING_DISPLAY);
+	} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+		fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_ARRAY_MODE,
+					  ARRAY_1D_TILED_THIN1);
+	}
+
+	fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_PIPE_CONFIG,
+				  pipe_config);
+
+	dce_v10_0_vga_enable(crtc, false);
+
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
+			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0);
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+	WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+	/*
+	 * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+	 * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+	 * retain the full precision throughout the pipeline.
+	 */
+	tmp = RREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset);
+	if (bypass_lut)
+		tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, GRPH_LUT_10BIT_BYPASS, GRPH_LUT_10BIT_BYPASS_EN, 0);
+	WREG32(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset, tmp);
+
+	if (bypass_lut)
+		DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+	WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+	WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+	fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+	dce_v10_0_grph_enable(crtc, true);
+
+	WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+	       target_fb->height);
+
+	x &= ~3;
+	y &= ~1;
+	WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+	       (x << 16) | y);
+	viewport_w = crtc->mode.hdisplay;
+	viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+	       (viewport_w << 16) | viewport_h);
+
+	/* set pageflip to happen anywhere in vblank interval */
+	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+	if (!atomic && fb && fb != crtc->primary->fb) {
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r != 0))
+			return r;
+		amdgpu_bo_unpin(abo);
+		amdgpu_bo_unreserve(abo);
+	}
+
+	/* Bytes per pixel may have changed */
+	dce_v10_0_bandwidth_update(adev);
+
+	return 0;
+}
+
+static void dce_v10_0_set_interleave(struct drm_crtc *crtc,
+				     struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	u32 tmp;
+
+	tmp = RREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset);
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, LB_DATA_FORMAT, INTERLEAVE_EN, 0);
+	WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u16 *r, *g, *b;
+	int i;
+	u32 tmp;
+
+	DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+	tmp = RREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_GRPH_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, INPUT_CSC_CONTROL, INPUT_CSC_OVL_MODE, 0);
+	WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, PRESCALE_GRPH_CONTROL, GRPH_PRESCALE_BYPASS, 1);
+	WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, PRESCALE_OVL_CONTROL, OVL_PRESCALE_BYPASS, 1);
+	WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, GRPH_INPUT_GAMMA_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, INPUT_GAMMA_CONTROL, OVL_INPUT_GAMMA_MODE, 0);
+	WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+	WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+	WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+	r = crtc->gamma_store;
+	g = r + crtc->gamma_size;
+	b = g + crtc->gamma_size;
+	for (i = 0; i < 256; i++) {
+		WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+		       ((*r++ & 0xffc0) << 14) |
+		       ((*g++ & 0xffc0) << 4) |
+		       (*b++ >> 6));
+	}
+
+	tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, GRPH_DEGAMMA_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, OVL_DEGAMMA_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, DEGAMMA_CONTROL, CURSOR_DEGAMMA_MODE, 0);
+	WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, GRPH_GAMUT_REMAP_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, GAMUT_REMAP_CONTROL, OVL_GAMUT_REMAP_MODE, 0);
+	WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, GRPH_REGAMMA_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, REGAMMA_CONTROL, OVL_REGAMMA_MODE, 0);
+	WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	tmp = RREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_GRPH_MODE, 0);
+	tmp = REG_SET_FIELD(tmp, OUTPUT_CSC_CONTROL, OUTPUT_CSC_OVL_MODE, 0);
+	WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+
+	/* XXX match this to the depth of the crtc fmt block, move to modeset? */
+	WREG32(mmDENORM_CONTROL + amdgpu_crtc->crtc_offset, 0);
+	/* XXX this only needs to be programmed once per crtc at startup,
+	 * not sure where the best place for it is
+	 */
+	tmp = RREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, ALPHA_CONTROL, CURSOR_ALPHA_BLND_ENA, 1);
+	WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v10_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		if (dig->linkb)
+			return 1;
+		else
+			return 0;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		if (dig->linkb)
+			return 3;
+		else
+			return 2;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		if (dig->linkb)
+			return 5;
+		else
+			return 4;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		return 6;
+	default:
+		DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+		return 0;
+	}
+}
+
+/**
+ * dce_v10_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc.  For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders.  For non-DP
+ * monitors a dedicated PPLL must be used.  If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself.  If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ * Asic specific PLL information
+ *
+ * DCE 10.x
+ * Tonga
+ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
+ * CI
+ * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+ *
+ */
+static u32 dce_v10_0_pick_pll(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 pll_in_use;
+	int pll;
+
+	if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+		if (adev->clock.dp_extclk)
+			/* skip PPLL programming if using ext clock */
+			return ATOM_PPLL_INVALID;
+		else {
+			/* use the same PPLL for all DP monitors */
+			pll = amdgpu_pll_get_shared_dp_ppll(crtc);
+			if (pll != ATOM_PPLL_INVALID)
+				return pll;
+		}
+	} else {
+		/* use the same PPLL for all monitors with the same clock */
+		pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+		if (pll != ATOM_PPLL_INVALID)
+			return pll;
+	}
+
+	/* DCE10 has PPLL0, PPLL1, and PPLL2 */
+	pll_in_use = amdgpu_pll_get_use_mask(crtc);
+	if (!(pll_in_use & (1 << ATOM_PPLL2)))
+		return ATOM_PPLL2;
+	if (!(pll_in_use & (1 << ATOM_PPLL1)))
+		return ATOM_PPLL1;
+	if (!(pll_in_use & (1 << ATOM_PPLL0)))
+		return ATOM_PPLL0;
+	DRM_ERROR("unable to allocate a PPLL\n");
+	return ATOM_PPLL_INVALID;
+}
+
+static void dce_v10_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	uint32_t cur_lock;
+
+	cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+	if (lock)
+		cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 1);
+	else
+		cur_lock = REG_SET_FIELD(cur_lock, CUR_UPDATE, CURSOR_UPDATE_LOCK, 0);
+	WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v10_0_hide_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	u32 tmp;
+
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 0);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static void dce_v10_0_show_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	u32 tmp;
+
+	WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(amdgpu_crtc->cursor_addr));
+	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       lower_32_bits(amdgpu_crtc->cursor_addr));
+
+	tmp = RREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_EN, 1);
+	tmp = REG_SET_FIELD(tmp, CUR_CONTROL, CURSOR_MODE, 2);
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+static int dce_v10_0_cursor_move_locked(struct drm_crtc *crtc,
+					int x, int y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	int xorigin = 0, yorigin = 0;
+
+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
+	/* avivo cursor are offset into the total surface */
+	x += crtc->x;
+	y += crtc->y;
+	DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+	if (x < 0) {
+		xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+		x = 0;
+	}
+	if (y < 0) {
+		yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+		y = 0;
+	}
+
+	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+	       ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+	return 0;
+}
+
+static int dce_v10_0_crtc_cursor_move(struct drm_crtc *crtc,
+				      int x, int y)
+{
+	int ret;
+
+	dce_v10_0_lock_cursor(crtc, true);
+	ret = dce_v10_0_cursor_move_locked(crtc, x, y);
+	dce_v10_0_lock_cursor(crtc, false);
+
+	return ret;
+}
+
+static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
+				      struct drm_file *file_priv,
+				      uint32_t handle,
+				      uint32_t width,
+				      uint32_t height,
+				      int32_t hot_x,
+				      int32_t hot_y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *aobj;
+	int ret;
+
+	if (!handle) {
+		/* turn off cursor */
+		dce_v10_0_hide_cursor(crtc);
+		obj = NULL;
+		goto unpin;
+	}
+
+	if ((width > amdgpu_crtc->max_cursor_width) ||
+	    (height > amdgpu_crtc->max_cursor_height)) {
+		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+		return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+		return -ENOENT;
+	}
+
+	aobj = gem_to_amdgpu_bo(obj);
+	ret = amdgpu_bo_reserve(aobj, false);
+	if (ret != 0) {
+		drm_gem_object_put(obj);
+		return ret;
+	}
+
+	aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+	amdgpu_bo_unreserve(aobj);
+	if (ret) {
+		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+		drm_gem_object_put(obj);
+		return ret;
+	}
+	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+	dce_v10_0_lock_cursor(crtc, true);
+
+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height ||
+	    hot_x != amdgpu_crtc->cursor_hot_x ||
+	    hot_y != amdgpu_crtc->cursor_hot_y) {
+		int x, y;
+
+		x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+		y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+		dce_v10_0_cursor_move_locked(crtc, x, y);
+
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+		amdgpu_crtc->cursor_hot_x = hot_x;
+		amdgpu_crtc->cursor_hot_y = hot_y;
+	}
+
+	dce_v10_0_show_cursor(crtc);
+	dce_v10_0_lock_cursor(crtc, false);
+
+unpin:
+	if (amdgpu_crtc->cursor_bo) {
+		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+		ret = amdgpu_bo_reserve(aobj, true);
+		if (likely(ret == 0)) {
+			amdgpu_bo_unpin(aobj);
+			amdgpu_bo_unreserve(aobj);
+		}
+		drm_gem_object_put(amdgpu_crtc->cursor_bo);
+	}
+
+	amdgpu_crtc->cursor_bo = obj;
+	return 0;
+}
+
+static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (amdgpu_crtc->cursor_bo) {
+		dce_v10_0_lock_cursor(crtc, true);
+
+		dce_v10_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+					     amdgpu_crtc->cursor_y);
+
+		dce_v10_0_show_cursor(crtc);
+
+		dce_v10_0_lock_cursor(crtc, false);
+	}
+}
+
+static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				    u16 *blue, uint32_t size,
+				    struct drm_modeset_acquire_ctx *ctx)
+{
+	dce_v10_0_crtc_load_lut(crtc);
+
+	return 0;
+}
+
+static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
+	.cursor_set2 = dce_v10_0_crtc_cursor_set2,
+	.cursor_move = dce_v10_0_crtc_cursor_move,
+	.gamma_set = dce_v10_0_crtc_gamma_set,
+	.set_config = amdgpu_display_crtc_set_config,
+	.destroy = dce_v10_0_crtc_destroy,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
+	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
+	.enable_vblank = amdgpu_enable_vblank_kms,
+	.disable_vblank = amdgpu_disable_vblank_kms,
+	.get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	unsigned type;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		amdgpu_crtc->enabled = true;
+		amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+		dce_v10_0_vga_enable(crtc, true);
+		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+		dce_v10_0_vga_enable(crtc, false);
+		/* Make sure VBLANK and PFLIP interrupts are still enabled */
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
+		amdgpu_irq_update(adev, &adev->crtc_irq, type);
+		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+		drm_crtc_vblank_on(crtc);
+		dce_v10_0_crtc_load_lut(crtc);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		drm_crtc_vblank_off(crtc);
+		if (amdgpu_crtc->enabled) {
+			dce_v10_0_vga_enable(crtc, true);
+			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+			dce_v10_0_vga_enable(crtc, false);
+		}
+		amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+		amdgpu_crtc->enabled = false;
+		break;
+	}
+	/* adjust pm to dpms */
+	amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v10_0_crtc_prepare(struct drm_crtc *crtc)
+{
+	/* disable crtc pair power gating before programming */
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v10_0_crtc_commit(struct drm_crtc *crtc)
+{
+	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v10_0_crtc_disable(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_atom_ss ss;
+	int i;
+
+	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	if (crtc->primary->fb) {
+		int r;
+		struct amdgpu_bo *abo;
+
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r))
+			DRM_ERROR("failed to reserve abo before unpin\n");
+		else {
+			amdgpu_bo_unpin(abo);
+			amdgpu_bo_unreserve(abo);
+		}
+	}
+	/* disable the GRPH */
+	dce_v10_0_grph_enable(crtc, false);
+
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i] &&
+		    adev->mode_info.crtcs[i]->enabled &&
+		    i != amdgpu_crtc->crtc_id &&
+		    amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+			/* one other crtc is using this pll don't turn
+			 * off the pll
+			 */
+			goto done;
+		}
+	}
+
+	switch (amdgpu_crtc->pll_id) {
+	case ATOM_PPLL0:
+	case ATOM_PPLL1:
+	case ATOM_PPLL2:
+		/* disable the ppll */
+		amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+					  0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+		break;
+	default:
+		break;
+	}
+done:
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v10_0_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (!amdgpu_crtc->adjusted_clock)
+		return -EINVAL;
+
+	amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+	amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+	dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+	amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+	amdgpu_atombios_crtc_scaler_setup(crtc);
+	dce_v10_0_cursor_reset(crtc);
+	/* update the hw version fpr dpm */
+	amdgpu_crtc->hw_mode = *adjusted_mode;
+
+	return 0;
+}
+
+static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc,
+				     const struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	/* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc == crtc) {
+			amdgpu_crtc->encoder = encoder;
+			amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+			break;
+		}
+	}
+	if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+		amdgpu_crtc->encoder = NULL;
+		amdgpu_crtc->connector = NULL;
+		return false;
+	}
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
+	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+		return false;
+	/* pick pll */
+	amdgpu_crtc->pll_id = dce_v10_0_pick_pll(crtc);
+	/* if we can't get a PPLL for a non-DP encoder, fail */
+	if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+	    !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+		return false;
+
+	return true;
+}
+
+static int dce_v10_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				  struct drm_framebuffer *old_fb)
+{
+	return dce_v10_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v10_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+					 struct drm_framebuffer *fb,
+					 int x, int y, enum mode_set_atomic state)
+{
+	return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
+	.dpms = dce_v10_0_crtc_dpms,
+	.mode_fixup = dce_v10_0_crtc_mode_fixup,
+	.mode_set = dce_v10_0_crtc_mode_set,
+	.mode_set_base = dce_v10_0_crtc_set_base,
+	.mode_set_base_atomic = dce_v10_0_crtc_set_base_atomic,
+	.prepare = dce_v10_0_crtc_prepare,
+	.commit = dce_v10_0_crtc_commit,
+	.disable = dce_v10_0_crtc_disable,
+	.get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+	struct drm_framebuffer *fb;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_device *adev;
+	uint32_t fb_format;
+
+	if (!plane->fb)
+		return;
+
+	fb = plane->fb;
+	amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+	adev = drm_to_adev(fb->dev);
+
+	/* Disable DC tiling */
+	fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+	fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+	.get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+	.panic_flush = dce_v10_0_panic_flush,
+};
+
+static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+	struct amdgpu_crtc *amdgpu_crtc;
+
+	amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+			      (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+	if (amdgpu_crtc == NULL)
+		return -ENOMEM;
+
+	drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v10_0_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+	amdgpu_crtc->crtc_id = index;
+	adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+	amdgpu_crtc->max_cursor_width = 128;
+	amdgpu_crtc->max_cursor_height = 128;
+	adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+	adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+	switch (amdgpu_crtc->crtc_id) {
+	case 0:
+	default:
+		amdgpu_crtc->crtc_offset = CRTC0_REGISTER_OFFSET;
+		break;
+	case 1:
+		amdgpu_crtc->crtc_offset = CRTC1_REGISTER_OFFSET;
+		break;
+	case 2:
+		amdgpu_crtc->crtc_offset = CRTC2_REGISTER_OFFSET;
+		break;
+	case 3:
+		amdgpu_crtc->crtc_offset = CRTC3_REGISTER_OFFSET;
+		break;
+	case 4:
+		amdgpu_crtc->crtc_offset = CRTC4_REGISTER_OFFSET;
+		break;
+	case 5:
+		amdgpu_crtc->crtc_offset = CRTC5_REGISTER_OFFSET;
+		break;
+	}
+
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+	drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+	drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
+
+	return 0;
+}
+
+static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
+	adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
+
+	dce_v10_0_set_display_funcs(adev);
+
+	adev->mode_info.num_crtc = dce_v10_0_get_num_crtc(adev);
+
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+		adev->mode_info.num_hpd = 6;
+		adev->mode_info.num_dig = 7;
+		break;
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+
+	dce_v10_0_set_irq_funcs(adev);
+
+	return 0;
+}
+
+static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r, i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+		if (r)
+			return r;
+	}
+
+	for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+		if (r)
+			return r;
+	}
+
+	/* HPD hotplug */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
+	if (r)
+		return r;
+
+	adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+
+	adev_to_drm(adev)->mode_config.async_page_flip = true;
+
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+
+	adev_to_drm(adev)->mode_config.preferred_depth = 24;
+	adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+	r = amdgpu_display_modeset_create_props(adev);
+	if (r)
+		return r;
+
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+
+	/* allocate crtcs */
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = dce_v10_0_crtc_init(adev, i);
+		if (r)
+			return r;
+	}
+
+	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+		amdgpu_display_print_display_setup(adev_to_drm(adev));
+	else
+		return -EINVAL;
+
+	/* setup afmt */
+	r = dce_v10_0_afmt_init(adev);
+	if (r)
+		return r;
+
+	r = dce_v10_0_audio_init(adev);
+	if (r)
+		return r;
+
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	INIT_DELAYED_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
+	drm_kms_helper_poll_init(adev_to_drm(adev));
+
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
+}
+
+static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+
+	drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+	dce_v10_0_audio_fini(adev);
+
+	dce_v10_0_afmt_fini(adev);
+
+	drm_mode_config_cleanup(adev_to_drm(adev));
+	adev->mode_info.mode_config_initialized = false;
+
+	return 0;
+}
+
+static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	dce_v10_0_init_golden_registers(adev);
+
+	/* disable vga render */
+	dce_v10_0_set_vga_render_state(adev, false);
+	/* init dig PHYs, disp eng pll */
+	amdgpu_atombios_encoder_init_dig(adev);
+	amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+	/* initialize hpd */
+	dce_v10_0_hpd_init(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v10_0_pageflip_interrupt_init(adev);
+
+	return 0;
+}
+
+static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	dce_v10_0_hpd_fini(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v10_0_pageflip_interrupt_fini(adev);
+
+	flush_delayed_work(&adev->hotplug_work);
+
+	return 0;
+}
+
+static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_display_suspend_helper(adev);
+	if (r)
+		return r;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+	return dce_v10_0_hw_fini(ip_block);
+}
+
+static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret;
+
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
+	ret = dce_v10_0_hw_init(ip_block);
+
+	/* turn on the BL */
+	if (adev->mode_info.bl_encoder) {
+		u8 bl_level = amdgpu_display_backlight_get_level(adev,
+								  adev->mode_info.bl_encoder);
+		amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+						    bl_level);
+	}
+	if (ret)
+		return ret;
+
+	return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	return dce_v10_0_is_display_hung(adev);
+}
+
+static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 srbm_soft_reset = 0, tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (dce_v10_0_is_display_hung(adev))
+		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+	return 0;
+}
+
+static void dce_v10_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+						     int crtc,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 lb_interrupt_mask;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+		lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+						  VBLANK_INTERRUPT_MASK, 0);
+		WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+		lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+						  VBLANK_INTERRUPT_MASK, 1);
+		WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dce_v10_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+						    int crtc,
+						    enum amdgpu_interrupt_state state)
+{
+	u32 lb_interrupt_mask;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+		lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+						  VLINE_INTERRUPT_MASK, 0);
+		WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc]);
+		lb_interrupt_mask = REG_SET_FIELD(lb_interrupt_mask, LB_INTERRUPT_MASK,
+						  VLINE_INTERRUPT_MASK, 1);
+		WREG32(mmLB_INTERRUPT_MASK + crtc_offsets[crtc], lb_interrupt_mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
+				       struct amdgpu_irq_src *source,
+				       unsigned hpd,
+				       enum amdgpu_interrupt_state state)
+{
+	u32 tmp;
+
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
+		return 0;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 0);
+		WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+		tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_EN, 1);
+		WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v10_0_set_crtc_irq_state(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CRTC_IRQ_VBLANK1:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK2:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK3:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK4:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK5:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK6:
+		dce_v10_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE1:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE2:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE3:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE4:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE5:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE6:
+		dce_v10_0_set_crtc_vline_interrupt_state(adev, 5, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int dce_v10_0_set_pageflip_irq_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 reg;
+
+	if (type >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", type);
+		return -EINVAL;
+	}
+
+	reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+	if (state == AMDGPU_IRQ_STATE_DISABLE)
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+	else
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+	return 0;
+}
+
+static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	unsigned long flags;
+	unsigned crtc_id;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_flip_work *works;
+
+	crtc_id = (entry->src_id - 8) >> 1;
+	amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+	if (crtc_id >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+		return -EINVAL;
+	}
+
+	if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+	    GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+		WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+		       GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+	/* IRQ could occur when in initial stage */
+	if (amdgpu_crtc == NULL)
+		return 0;
+
+	spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+	works = amdgpu_crtc->pflip_works;
+	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+		DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+						 "AMDGPU_FLIP_SUBMITTED(%d)\n",
+						 amdgpu_crtc->pflip_status,
+						 AMDGPU_FLIP_SUBMITTED);
+		spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+		return 0;
+	}
+
+	/* page flip completed. clean up */
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+	amdgpu_crtc->pflip_works = NULL;
+
+	/* wakeup usersapce */
+	if (works->event)
+		drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+	spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
+	schedule_work(&works->unpin_work);
+
+	return 0;
+}
+
+static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
+				  int hpd)
+{
+	u32 tmp;
+
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
+		return;
+	}
+
+	tmp = RREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd]);
+	tmp = REG_SET_FIELD(tmp, DC_HPD_INT_CONTROL, DC_HPD_INT_ACK, 1);
+	WREG32(mmDC_HPD_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+static void dce_v10_0_crtc_vblank_int_ack(struct amdgpu_device *adev,
+					  int crtc)
+{
+	u32 tmp;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	tmp = RREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc]);
+	tmp = REG_SET_FIELD(tmp, LB_VBLANK_STATUS, VBLANK_ACK, 1);
+	WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static void dce_v10_0_crtc_vline_int_ack(struct amdgpu_device *adev,
+					 int crtc)
+{
+	u32 tmp;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	tmp = RREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc]);
+	tmp = REG_SET_FIELD(tmp, LB_VLINE_STATUS, VLINE_ACK, 1);
+	WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], tmp);
+}
+
+static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
+			      struct amdgpu_irq_src *source,
+			      struct amdgpu_iv_entry *entry)
+{
+	unsigned crtc = entry->src_id - 1;
+	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc);
+
+	switch (entry->src_data[0]) {
+	case 0: /* vblank */
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
+			dce_v10_0_crtc_vblank_int_ack(adev, crtc);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev_to_drm(adev), crtc);
+		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+
+		break;
+	case 1: /* vline */
+		if (disp_int & interrupt_status_offsets[crtc].vline)
+			dce_v10_0_crtc_vline_int_ack(adev, crtc);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+
+		break;
+	default:
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	uint32_t disp_int, mask;
+	unsigned hpd;
+
+	if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		return 0;
+	}
+
+	hpd = entry->src_data[0];
+	disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+	mask = interrupt_status_offsets[hpd].hpd;
+
+	if (disp_int & mask) {
+		dce_v10_0_hpd_int_ack(adev, hpd);
+		schedule_delayed_work(&adev->hotplug_work, 0);
+		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+	}
+
+	return 0;
+}
+
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
+	.name = "dce_v10_0",
+	.early_init = dce_v10_0_early_init,
+	.sw_init = dce_v10_0_sw_init,
+	.sw_fini = dce_v10_0_sw_fini,
+	.hw_init = dce_v10_0_hw_init,
+	.hw_fini = dce_v10_0_hw_fini,
+	.suspend = dce_v10_0_suspend,
+	.resume = dce_v10_0_resume,
+	.is_idle = dce_v10_0_is_idle,
+	.check_soft_reset = dce_v10_0_check_soft_reset,
+	.soft_reset = dce_v10_0_soft_reset,
+	.set_clockgating_state = dce_v10_0_set_clockgating_state,
+	.set_powergating_state = dce_v10_0_set_powergating_state,
+};
+
+static void
+dce_v10_0_encoder_mode_set(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+	amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+	/* need to call this here rather than in prepare() since we need some crtc info */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	/* set scaler clears this on some chips */
+	dce_v10_0_set_interleave(encoder->crtc, mode);
+
+	if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+		dce_v10_0_afmt_enable(encoder, true);
+		dce_v10_0_afmt_setmode(encoder, adjusted_mode);
+	}
+}
+
+static void dce_v10_0_encoder_prepare(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+	if ((amdgpu_encoder->active_device &
+	     (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+	     ENCODER_OBJECT_ID_NONE)) {
+		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+		if (dig) {
+			dig->dig_encoder = dce_v10_0_pick_dig_encoder(encoder);
+			if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+				dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+		}
+	}
+
+	amdgpu_atombios_scratch_regs_lock(adev, true);
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		/* select the clock/data port if it uses a router */
+		if (amdgpu_connector->router.cd_valid)
+			amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+		/* turn eDP panel on for mode set */
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+			amdgpu_atombios_encoder_set_edp_panel_power(connector,
+							     ATOM_TRANSMITTER_ACTION_POWER_ON);
+	}
+
+	/* this is needed for the pll/ss setup to work correctly in some cases */
+	amdgpu_atombios_encoder_set_crtc_source(encoder);
+	/* set up the FMT blocks */
+	dce_v10_0_program_fmt(encoder);
+}
+
+static void dce_v10_0_encoder_commit(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	/* need to call this here as we need the crtc set up */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v10_0_encoder_disable(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig;
+
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	if (amdgpu_atombios_encoder_is_digital(encoder)) {
+		if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+			dce_v10_0_afmt_enable(encoder, false);
+		dig = amdgpu_encoder->enc_priv;
+		dig->dig_encoder = -1;
+	}
+	amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v10_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v10_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v10_0_ext_mode_set(struct drm_encoder *encoder,
+		      struct drm_display_mode *mode,
+		      struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v10_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v10_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static const struct drm_encoder_helper_funcs dce_v10_0_ext_helper_funcs = {
+	.dpms = dce_v10_0_ext_dpms,
+	.prepare = dce_v10_0_ext_prepare,
+	.mode_set = dce_v10_0_ext_mode_set,
+	.commit = dce_v10_0_ext_commit,
+	.disable = dce_v10_0_ext_disable,
+	/* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v10_0_dig_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v10_0_encoder_prepare,
+	.mode_set = dce_v10_0_encoder_mode_set,
+	.commit = dce_v10_0_encoder_commit,
+	.disable = dce_v10_0_encoder_disable,
+	.detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v10_0_dac_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v10_0_encoder_prepare,
+	.mode_set = dce_v10_0_encoder_mode_set,
+	.commit = dce_v10_0_encoder_commit,
+	.detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v10_0_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+		amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+	kfree(amdgpu_encoder->enc_priv);
+	drm_encoder_cleanup(encoder);
+	kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v10_0_encoder_funcs = {
+	.destroy = dce_v10_0_encoder_destroy,
+};
+
+static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
+				 uint32_t encoder_enum,
+				 uint32_t supported_device,
+				 u16 caps)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	/* see if we already added it */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		if (amdgpu_encoder->encoder_enum == encoder_enum) {
+			amdgpu_encoder->devices |= supported_device;
+			return;
+		}
+
+	}
+
+	/* add a new one */
+	amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+	if (!amdgpu_encoder)
+		return;
+
+	encoder = &amdgpu_encoder->base;
+	switch (adev->mode_info.num_crtc) {
+	case 1:
+		encoder->possible_crtcs = 0x1;
+		break;
+	case 2:
+	default:
+		encoder->possible_crtcs = 0x3;
+		break;
+	case 4:
+		encoder->possible_crtcs = 0xf;
+		break;
+	case 6:
+		encoder->possible_crtcs = 0x3f;
+		break;
+	}
+
+	amdgpu_encoder->enc_priv = NULL;
+
+	amdgpu_encoder->encoder_enum = encoder_enum;
+	amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	amdgpu_encoder->devices = supported_device;
+	amdgpu_encoder->rmx_type = RMX_OFF;
+	amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+	amdgpu_encoder->is_ext_encoder = false;
+	amdgpu_encoder->caps = caps;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
+		drm_encoder_helper_add(encoder, &dce_v10_0_dac_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+			amdgpu_encoder->rmx_type = RMX_FULL;
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		} else {
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		}
+		drm_encoder_helper_add(encoder, &dce_v10_0_dig_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_SI170B:
+	case ENCODER_OBJECT_ID_CH7303:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+	case ENCODER_OBJECT_ID_TITFP513:
+	case ENCODER_OBJECT_ID_VT1623:
+	case ENCODER_OBJECT_ID_HDMI_SI1930:
+	case ENCODER_OBJECT_ID_TRAVIS:
+	case ENCODER_OBJECT_ID_NUTMEG:
+		/* these are handled by the primary encoders */
+		amdgpu_encoder->is_ext_encoder = true;
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+		else
+			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+		drm_encoder_helper_add(encoder, &dce_v10_0_ext_helper_funcs);
+		break;
+	}
+}
+
+static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
+	.bandwidth_update = &dce_v10_0_bandwidth_update,
+	.vblank_get_counter = &dce_v10_0_vblank_get_counter,
+	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+	.hpd_sense = &dce_v10_0_hpd_sense,
+	.hpd_set_polarity = &dce_v10_0_hpd_set_polarity,
+	.hpd_get_gpio_reg = &dce_v10_0_hpd_get_gpio_reg,
+	.page_flip = &dce_v10_0_page_flip,
+	.page_flip_get_scanoutpos = &dce_v10_0_crtc_get_scanoutpos,
+	.add_encoder = &dce_v10_0_encoder_add,
+	.add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
+{
+	adev->mode_info.funcs = &dce_v10_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = {
+	.set = dce_v10_0_set_crtc_irq_state,
+	.process = dce_v10_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_pageflip_irq_funcs = {
+	.set = dce_v10_0_set_pageflip_irq_state,
+	.process = dce_v10_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v10_0_hpd_irq_funcs = {
+	.set = dce_v10_0_set_hpd_irq_state,
+	.process = dce_v10_0_hpd_irq,
+};
+
+static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.num_crtc > 0)
+		adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+	else
+		adev->crtc_irq.num_types = 0;
+	adev->crtc_irq.funcs = &dce_v10_0_crtc_irq_funcs;
+
+	adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+	adev->pageflip_irq.funcs = &dce_v10_0_pageflip_irq_funcs;
+
+	adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+	adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v10_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 10,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v10_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v10_1_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 10,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &dce_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
new file mode 100644
index 000000000000..7a0747789f1d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V10_0_H__
+#define __DCE_V10_0_H__
+
+
+extern const struct amdgpu_ip_block_version dce_v10_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v10_1_ip_block;
+
+void dce_v10_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
new file mode 100644
index 000000000000..acc887a58518
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -0,0 +1,3577 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/pci.h>
+
+#include <drm/drm_edid.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+
+#include "dce_v6_0.h"
+#include "sid.h"
+
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
+#include "si_enums.h"
+
+static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[6] =
+{
+	CRTC0_REGISTER_OFFSET,
+	CRTC1_REGISTER_OFFSET,
+	CRTC2_REGISTER_OFFSET,
+	CRTC3_REGISTER_OFFSET,
+	CRTC4_REGISTER_OFFSET,
+	CRTC5_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] =
+{
+	HPD0_REGISTER_OFFSET,
+	HPD1_REGISTER_OFFSET,
+	HPD2_REGISTER_OFFSET,
+	HPD3_REGISTER_OFFSET,
+	HPD4_REGISTER_OFFSET,
+	HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+	CRTC0_REGISTER_OFFSET,
+	CRTC1_REGISTER_OFFSET,
+	CRTC2_REGISTER_OFFSET,
+	CRTC3_REGISTER_OFFSET,
+	CRTC4_REGISTER_OFFSET,
+	CRTC5_REGISTER_OFFSET,
+	(0x13830 - 0x7030) >> 2,
+};
+
+static const struct {
+	uint32_t	reg;
+	uint32_t	vblank;
+	uint32_t	vline;
+	uint32_t	hpd;
+
+} interrupt_status_offsets[6] = { {
+	.reg = mmDISP_INTERRUPT_STATUS,
+	.vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev,
+				     u32 block_offset, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+	return r;
+}
+
+static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
+				      u32 block_offset, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset,
+		reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+	if (crtc >= adev->mode_info.num_crtc)
+		return 0;
+	else
+		return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v6_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Enable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v6_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Disable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v6_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Does the actual pageflip (evergreen+).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
+static void dce_v6_0_page_flip(struct amdgpu_device *adev,
+			       int crtc_id, u64 crtc_base, bool async)
+{
+	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+
+	/* flip at hsync for async, default is vsync */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
+	       GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+	/* update pitch */
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+	       fb->pitches[0] / fb->format->cpp[0]);
+	/* update the scanout addresses */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32)crtc_base);
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+					u32 *vbl, u32 *position)
+{
+	if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+		return -EINVAL;
+
+	*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+	*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+	return 0;
+}
+
+/**
+ * dce_v6_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
+			       enum amdgpu_hpd_id hpd)
+{
+	bool connected = false;
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return connected;
+
+	if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+	    DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+		connected = true;
+
+	return connected;
+}
+
+/**
+ * dce_v6_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev,
+				      enum amdgpu_hpd_id hpd)
+{
+	u32 tmp;
+	bool connected = dce_v6_0_hpd_sense(adev, hpd);
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return;
+
+	tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+	if (connected)
+		tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	else
+		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
+				 int hpd)
+{
+	u32 tmp;
+
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
+		return;
+	}
+
+	tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+	tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+	WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v6_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v6_0_hpd_init(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp |= DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+			/* don't try to enable hpd on eDP or LVDS avoid breaking the
+			 * aux dp channel on imac and help (but not completely fix)
+			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+			 * also avoid interrupt storms during dpms.
+			 */
+			tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+			tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+			WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+			continue;
+		}
+
+		dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
+		dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v6_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+	return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
+{
+	u32 crtc_hung = 0;
+	u32 crtc_status[6];
+	u32 i, j, tmp;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+			crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+			crtc_hung |= (1 << i);
+		}
+	}
+
+	for (j = 0; j < 10; j++) {
+		for (i = 0; i < adev->mode_info.num_crtc; i++) {
+			if (crtc_hung & (1 << i)) {
+				tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+				if (tmp != crtc_status[i])
+					crtc_hung &= ~(1 << i);
+			}
+		}
+		if (crtc_hung == 0)
+			return false;
+		udelay(100);
+	}
+
+	return true;
+}
+
+static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
+					  bool render)
+{
+	if (!render)
+		WREG32(mmVGA_RENDER_CONTROL,
+		       RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
+}
+
+static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+		return 6;
+	case CHIP_OLAND:
+		return 2;
+	default:
+		return 0;
+	}
+}
+
+void dce_v6_0_disable_dce(struct amdgpu_device *adev)
+{
+	/*Disable VGA render and enabled crtc, if has DCE engine*/
+	if (amdgpu_atombios_has_dce_engine_info(adev)) {
+		u32 tmp;
+		int crtc_enabled, i;
+
+		dce_v6_0_set_vga_render_state(adev, false);
+
+		/*Disable crtc*/
+		for (i = 0; i < dce_v6_0_get_num_crtc(adev); i++) {
+			crtc_enabled = RREG32(mmCRTC_CONTROL + crtc_offsets[i]) &
+				CRTC_CONTROL__CRTC_MASTER_EN_MASK;
+			if (crtc_enabled) {
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+				tmp &= ~CRTC_CONTROL__CRTC_MASTER_EN_MASK;
+				WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+			}
+		}
+	}
+}
+
+static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	int bpc = 0;
+	u32 tmp = 0;
+	enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		bpc = amdgpu_connector_get_monitor_bpc(connector);
+		dither = amdgpu_connector->dither;
+	}
+
+	/* LVDS FMT is set up by atom */
+	if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+		return;
+
+	if (bpc == 0)
+		return;
+
+
+	switch (bpc) {
+	case 6:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE)
+			/* XXX sort out optimal dither settings */
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK);
+		else
+			tmp |= FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK;
+		break;
+	case 8:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE)
+			/* XXX sort out optimal dither settings */
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH_MASK);
+		else
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH_MASK);
+		break;
+	case 10:
+	default:
+		/* not needed */
+		break;
+	}
+
+	WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+/**
+ * si_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 si_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+	switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
+	case 0:
+	default:
+		return 1;
+	case 1:
+		return 2;
+	case 2:
+		return 4;
+	case 3:
+		return 8;
+	case 4:
+		return 3;
+	case 5:
+		return 6;
+	case 6:
+		return 10;
+	case 7:
+		return 12;
+	case 8:
+		return 16;
+	}
+}
+
+struct dce6_wm_params {
+	u32 dram_channels; /* number of dram channels */
+	u32 yclk;          /* bandwidth per dram data pin in kHz */
+	u32 sclk;          /* engine clock in kHz */
+	u32 disp_clk;      /* display clock in kHz */
+	u32 src_width;     /* viewport width */
+	u32 active_time;   /* active display time in ns */
+	u32 blank_time;    /* blank time in ns */
+	bool interlaced;    /* mode is interlaced */
+	fixed20_12 vsc;    /* vertical scale ratio */
+	u32 num_heads;     /* number of active crtcs */
+	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+	u32 lb_size;       /* line buffer allocated to pipe */
+	u32 vtaps;         /* vertical scaler taps */
+};
+
+/**
+ * dce_v6_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_dram_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate raw DRAM Bandwidth */
+	fixed20_12 dram_efficiency; /* 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	dram_efficiency.full = dfixed_const(7);
+	dram_efficiency.full = dfixed_div(dram_efficiency, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v6_0_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+	/* Calculate DRAM Bandwidth and the part allocated to display. */
+	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_data_return_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the display Data return Bandwidth */
+	fixed20_12 return_efficiency; /* 0.8 */
+	fixed20_12 sclk, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	sclk.full = dfixed_const(wm->sclk);
+	sclk.full = dfixed_div(sclk, a);
+	a.full = dfixed_const(10);
+	return_efficiency.full = dfixed_const(8);
+	return_efficiency.full = dfixed_div(return_efficiency, a);
+	a.full = dfixed_const(32);
+	bandwidth.full = dfixed_mul(a, sclk);
+	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_dmif_request_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the DMIF Request Bandwidth */
+	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+	fixed20_12 disp_clk, bandwidth;
+	fixed20_12 a, b;
+
+	a.full = dfixed_const(1000);
+	disp_clk.full = dfixed_const(wm->disp_clk);
+	disp_clk.full = dfixed_div(disp_clk, a);
+	a.full = dfixed_const(32);
+	b.full = dfixed_mul(a, disp_clk);
+
+	a.full = dfixed_const(10);
+	disp_clk_request_efficiency.full = dfixed_const(8);
+	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_available_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+	u32 dram_bandwidth = dce_v6_0_dram_bandwidth(wm);
+	u32 data_return_bandwidth = dce_v6_0_data_return_bandwidth(wm);
+	u32 dmif_req_bandwidth = dce_v6_0_dmif_request_bandwidth(wm);
+
+	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v6_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v6_0_average_bandwidth(struct dce6_wm_params *wm)
+{
+	/* Calculate the display mode Average Bandwidth
+	 * DisplayMode should contain the source and destination dimensions,
+	 * timing, etc.
+	 */
+	fixed20_12 bpp;
+	fixed20_12 line_time;
+	fixed20_12 src_width;
+	fixed20_12 bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+	line_time.full = dfixed_div(line_time, a);
+	bpp.full = dfixed_const(wm->bytes_per_pixel);
+	src_width.full = dfixed_const(wm->src_width);
+	bandwidth.full = dfixed_mul(src_width, bpp);
+	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+	bandwidth.full = dfixed_div(bandwidth, line_time);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v6_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm)
+{
+	/* First calculate the latency in ns */
+	u32 mc_latency = 2000; /* 2000 ns. */
+	u32 available_bandwidth = dce_v6_0_available_bandwidth(wm);
+	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+		(wm->num_heads * cursor_line_pair_return_time);
+	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+	u32 tmp, dmif_size = 12288;
+	fixed20_12 a, b, c;
+
+	if (wm->num_heads == 0)
+		return 0;
+
+	a.full = dfixed_const(2);
+	b.full = dfixed_const(1);
+	if ((wm->vsc.full > a.full) ||
+	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+	    (wm->vtaps >= 5) ||
+	    ((wm->vsc.full >= a.full) && wm->interlaced))
+		max_src_lines_per_dst_line = 4;
+	else
+		max_src_lines_per_dst_line = 2;
+
+	a.full = dfixed_const(available_bandwidth);
+	b.full = dfixed_const(wm->num_heads);
+	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
+
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+	b.full = dfixed_const(1000);
+	c.full = dfixed_const(lb_fill_bw);
+	b.full = dfixed_div(c, b);
+	a.full = dfixed_div(a, b);
+	line_fill_time = dfixed_trunc(a);
+
+	if (line_fill_time < wm->active_time)
+		return latency;
+	else
+		return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
+{
+	if (dce_v6_0_average_bandwidth(wm) <=
+	    (dce_v6_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v6_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
+{
+	if (dce_v6_0_average_bandwidth(wm) <=
+	    (dce_v6_0_available_bandwidth(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v6_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v6_0_check_latency_hiding(struct dce6_wm_params *wm)
+{
+	u32 lb_partitions = wm->lb_size / wm->src_width;
+	u32 line_time = wm->active_time + wm->blank_time;
+	u32 latency_tolerant_lines;
+	u32 latency_hiding;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1);
+	if (wm->vsc.full > a.full)
+		latency_tolerant_lines = 1;
+	else {
+		if (lb_partitions <= (wm->vtaps + 1))
+			latency_tolerant_lines = 1;
+		else
+			latency_tolerant_lines = 2;
+	}
+
+	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+	if (dce_v6_0_latency_watermark(wm) <= latency_hiding)
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v6_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
+					struct amdgpu_crtc *amdgpu_crtc,
+					u32 lb_size, u32 num_heads)
+{
+	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+	struct dce6_wm_params wm_low, wm_high;
+	u32 dram_channels;
+	u32 active_time;
+	u32 line_time = 0;
+	u32 latency_watermark_a = 0, latency_watermark_b = 0;
+	u32 priority_a_mark = 0, priority_b_mark = 0;
+	u32 priority_a_cnt = PRIORITY_OFF;
+	u32 priority_b_cnt = PRIORITY_OFF;
+	u32 tmp, arb_control3, lb_vblank_lead_lines = 0;
+	fixed20_12 a, b, c;
+
+	if (amdgpu_crtc->base.enabled && num_heads && mode) {
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min_t(u32, line_time, 65535);
+		priority_a_cnt = 0;
+		priority_b_cnt = 0;
+
+		dram_channels = si_get_number_of_dram_channels(adev);
+
+		/* watermark for high clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_high.yclk =
+				amdgpu_dpm_get_mclk(adev, false) * 10;
+			wm_high.sclk =
+				amdgpu_dpm_get_sclk(adev, false) * 10;
+		} else {
+			wm_high.yclk = adev->pm.current_mclk * 10;
+			wm_high.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_high.disp_clk = mode->clock;
+		wm_high.src_width = mode->crtc_hdisplay;
+		wm_high.active_time = active_time;
+		wm_high.blank_time = line_time - wm_high.active_time;
+		wm_high.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_high.interlaced = true;
+		wm_high.vsc = amdgpu_crtc->vsc;
+		wm_high.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_high.vtaps = 2;
+		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_high.lb_size = lb_size;
+		wm_high.dram_channels = dram_channels;
+		wm_high.num_heads = num_heads;
+
+		/* watermark for low clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_low.yclk =
+				amdgpu_dpm_get_mclk(adev, true) * 10;
+			wm_low.sclk =
+				amdgpu_dpm_get_sclk(adev, true) * 10;
+		} else {
+			wm_low.yclk = adev->pm.current_mclk * 10;
+			wm_low.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_low.disp_clk = mode->clock;
+		wm_low.src_width = mode->crtc_hdisplay;
+		wm_low.active_time = active_time;
+		wm_low.blank_time = line_time - wm_low.active_time;
+		wm_low.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_low.interlaced = true;
+		wm_low.vsc = amdgpu_crtc->vsc;
+		wm_low.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_low.vtaps = 2;
+		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_low.lb_size = lb_size;
+		wm_low.dram_channels = dram_channels;
+		wm_low.num_heads = num_heads;
+
+		/* set for high clocks */
+		latency_watermark_a = min_t(u32, dce_v6_0_latency_watermark(&wm_high), 65535);
+		/* set for low clocks */
+		latency_watermark_b = min_t(u32, dce_v6_0_latency_watermark(&wm_low), 65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+		    !dce_v6_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+		    !dce_v6_0_check_latency_hiding(&wm_high) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+			priority_a_cnt |= PRIORITY_ALWAYS_ON;
+			priority_b_cnt |= PRIORITY_ALWAYS_ON;
+		}
+		if (!dce_v6_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+		    !dce_v6_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+		    !dce_v6_0_check_latency_hiding(&wm_low) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+			priority_a_cnt |= PRIORITY_ALWAYS_ON;
+			priority_b_cnt |= PRIORITY_ALWAYS_ON;
+		}
+
+		a.full = dfixed_const(1000);
+		b.full = dfixed_const(mode->clock);
+		b.full = dfixed_div(b, a);
+		c.full = dfixed_const(latency_watermark_a);
+		c.full = dfixed_mul(c, b);
+		c.full = dfixed_mul(c, amdgpu_crtc->hsc);
+		c.full = dfixed_div(c, a);
+		a.full = dfixed_const(16);
+		c.full = dfixed_div(c, a);
+		priority_a_mark = dfixed_trunc(c);
+		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
+
+		a.full = dfixed_const(1000);
+		b.full = dfixed_const(mode->clock);
+		b.full = dfixed_div(b, a);
+		c.full = dfixed_const(latency_watermark_b);
+		c.full = dfixed_mul(c, b);
+		c.full = dfixed_mul(c, amdgpu_crtc->hsc);
+		c.full = dfixed_div(c, a);
+		a.full = dfixed_const(16);
+		c.full = dfixed_div(c, a);
+		priority_b_mark = dfixed_trunc(c);
+		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+	}
+
+	/* select wm A */
+	arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
+	tmp = arb_control3;
+	tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT)  |
+		(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+	/* select wm B */
+	tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
+	tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+	WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+		(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+	/* restore original selection */
+	WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, arb_control3);
+
+	/* write the priority marks */
+	WREG32(mmPRIORITY_A_CNT + amdgpu_crtc->crtc_offset, priority_a_cnt);
+	WREG32(mmPRIORITY_B_CNT + amdgpu_crtc->crtc_offset, priority_b_cnt);
+
+	/* save values for DPM */
+	amdgpu_crtc->line_time = line_time;
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/* watermark setup */
+/**
+ * dce_v6_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ * @other_mode: the display mode of another display controller
+ *              that may be sharing the line buffer
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
+				   struct amdgpu_crtc *amdgpu_crtc,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *other_mode)
+{
+	u32 tmp, buffer_alloc, i;
+	u32 pipe_offset = amdgpu_crtc->crtc_id * 0x8;
+	/*
+	 * Line Buffer Setup
+	 * There are 3 line buffers, each one shared by 2 display controllers.
+	 * mmDC_LB_MEMORY_SPLIT controls how that line buffer is shared between
+	 * the display controllers.  The paritioning is done via one of four
+	 * preset allocations specified in bits 21:20:
+	 *  0 - half lb
+	 *  2 - whole lb, other crtc must be disabled
+	 */
+	/* this can get tricky if we have two large displays on a paired group
+	 * of crtcs.  Ideally for multiple large displays we'd assign them to
+	 * non-linked crtcs for maximum line buffer allocation.
+	 */
+	if (amdgpu_crtc->base.enabled && mode) {
+		if (other_mode) {
+			tmp = 0; /* 1/2 */
+			buffer_alloc = 1;
+		} else {
+			tmp = 2; /* whole */
+			buffer_alloc = 2;
+		}
+	} else {
+		tmp = 0;
+		buffer_alloc = 0;
+	}
+
+	WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
+	       (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
+
+	WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+	       (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+		    PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATION_COMPLETED_MASK)
+			break;
+		udelay(1);
+	}
+
+	if (amdgpu_crtc->base.enabled && mode) {
+		switch (tmp) {
+		case 0:
+		default:
+			return 4096 * 2;
+		case 2:
+			return 8192 * 2;
+		}
+	}
+
+	/* controller not enabled, so no lb used */
+	return 0;
+}
+
+
+/**
+ * dce_v6_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
+{
+	struct drm_display_mode *mode0 = NULL;
+	struct drm_display_mode *mode1 = NULL;
+	u32 num_heads = 0, lb_size;
+	int i;
+
+	if (!adev->mode_info.mode_config_initialized)
+		return;
+
+	amdgpu_display_update_priority(adev);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i]->base.enabled)
+			num_heads++;
+	}
+	for (i = 0; i < adev->mode_info.num_crtc; i += 2) {
+		mode0 = &adev->mode_info.crtcs[i]->base.mode;
+		mode1 = &adev->mode_info.crtcs[i+1]->base.mode;
+		lb_size = dce_v6_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode0, mode1);
+		dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i], lb_size, num_heads);
+		lb_size = dce_v6_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i+1], mode1, mode0);
+		dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads);
+	}
+}
+
+static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+	int i;
+	u32 tmp;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset,
+				ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+		if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT,
+					PORT_CONNECTIVITY))
+			adev->mode_info.audio.pin[i].connected = false;
+		else
+			adev->mode_info.audio.pin[i].connected = true;
+	}
+
+}
+
+static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *adev)
+{
+	int i;
+
+	dce_v6_0_audio_get_connected_pins(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		if (adev->mode_info.audio.pin[i].connected)
+			return &adev->mode_info.audio.pin[i];
+	}
+	DRM_ERROR("No connected audio pins found!\n");
+	return NULL;
+}
+
+static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset,
+	       REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT,
+		             dig->afmt->pin->id));
+}
+
+static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder,
+						struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	int interlace = 0;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		interlace = 1;
+
+	if (connector->latency_present[interlace]) {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				VIDEO_LIPSYNC, connector->video_latency[interlace]);
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				AUDIO_LIPSYNC, connector->audio_latency[interlace]);
+	} else {
+		tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				VIDEO_LIPSYNC, 0);
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC,
+				AUDIO_LIPSYNC, 0);
+	}
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			   ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u8 *sadb = NULL;
+	int sad_count;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		sad_count = 0;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			HDMI_CONNECTION, 0);
+	tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+			DP_CONNECTION, 0);
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				DP_CONNECTION, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				HDMI_CONNECTION, 1);
+
+	if (sad_count)
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				SPEAKER_ALLOCATION, sadb[0]);
+	else
+		tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER,
+				SPEAKER_ALLOCATION, 5); /* stereo */
+
+	WREG32_AUDIO_ENDPT(dig->afmt->pin->offset,
+			ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 offset;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+	if (sad_count < 0)
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 value = 0;
+		u8 stereo_freqs = 0;
+		int max_channels = -1;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				if (sad->channels > max_channels) {
+					value = (sad->channels <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+					       (sad->byte2 <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+					       (sad->freq <<
+						AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
+					max_channels = sad->channels;
+				}
+
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					stereo_freqs |= sad->freq;
+				else
+					break;
+			}
+		}
+
+		value |= (stereo_freqs <<
+			AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+		WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
+	}
+
+	kfree(sads);
+}
+
+static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
+				  struct amdgpu_audio_pin *pin,
+				  bool enable)
+{
+	if (!pin)
+		return;
+
+	WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+			enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[7] =
+{
+	AUD0_REGISTER_OFFSET,
+	AUD1_REGISTER_OFFSET,
+	AUD2_REGISTER_OFFSET,
+	AUD3_REGISTER_OFFSET,
+	AUD4_REGISTER_OFFSET,
+	AUD5_REGISTER_OFFSET,
+	AUD6_REGISTER_OFFSET,
+};
+
+static int dce_v6_0_audio_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!amdgpu_audio)
+		return 0;
+
+	adev->mode_info.audio.enabled = true;
+
+	switch (adev->asic_type) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	default:
+		adev->mode_info.audio.num_pins = 6;
+		break;
+	case CHIP_OLAND:
+		adev->mode_info.audio.num_pins = 2;
+		break;
+	}
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		adev->mode_info.audio.pin[i].channels = -1;
+		adev->mode_info.audio.pin[i].rate = -1;
+		adev->mode_info.audio.pin[i].bits_per_sample = -1;
+		adev->mode_info.audio.pin[i].status_bits = 0;
+		adev->mode_info.audio.pin[i].category_code = 0;
+		adev->mode_info.audio.pin[i].connected = false;
+		adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+		adev->mode_info.audio.pin[i].id = i;
+		/* disable audio.  it will be set up later */
+		/* XXX remove once we switch to ip funcs */
+		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	return 0;
+}
+
+static void dce_v6_0_audio_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_audio)
+		return;
+
+	if (!adev->mode_info.audio.enabled)
+		return;
+
+	adev->mode_info.audio.enabled = false;
+}
+
+static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1);
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder,
+				   uint32_t clock, int bpc)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE,
+			bpc > 8 ? 0 : 1);
+	WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz);
+	WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz);
+	WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz);
+	WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz);
+	WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz);
+	WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp);
+	tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz);
+	WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
+					       struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	struct hdmi_avi_infoframe frame;
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+	uint8_t *payload = buffer + 3;
+	uint8_t *header = buffer;
+	ssize_t err;
+	u32 tmp;
+
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+	if (err < 0) {
+		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset,
+	       payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24));
+	WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset,
+	       payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24));
+	WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset,
+	       payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24));
+	WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset,
+	       payload[0xC] | (payload[0xD] << 8) | (header[1] << 24));
+
+	tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+	/* anything other than 0 */
+	tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1,
+			HDMI_AUDIO_INFO_LINE, 2);
+	WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+	u32 tmp;
+
+	/*
+	 * Two dtos: generally use dto0 for hdmi, dto1 for dp.
+	 * Express [24MHz / target pixel clock] as an exact rational
+	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
+	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+	 */
+	tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE);
+	tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+			DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+				DCCG_AUDIO_DTO_SEL, 0);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE,
+				DCCG_AUDIO_DTO_SEL, 1);
+	}
+	WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000);
+		WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000);
+		WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock);
+	}
+}
+
+static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+	WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1);
+	WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+	WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7);
+	tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+	WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1);
+	tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3);
+	WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+	tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1);
+	tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	tmp = RREG32(mmHDMI_GC + dig->afmt->offset);
+	tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0);
+	WREG32(mmHDMI_GC + dig->afmt->offset, tmp);
+}
+
+static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1);
+		WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2);
+		WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+	} else {
+		tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0);
+		tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0);
+		WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+	}
+}
+
+static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1);
+		WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1);
+		WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp);
+
+		tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1);
+		tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+		WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp);
+	} else {
+		WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0);
+	}
+}
+
+static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+	int bpc = 8;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (!dig->afmt->enabled)
+		return;
+
+	dig->afmt->pin = dce_v6_0_audio_get_pin(adev);
+	if (!dig->afmt->pin)
+		return;
+
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		bpc = amdgpu_crtc->bpc;
+	}
+
+	/* disable audio before setting up hw */
+	dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
+
+	dce_v6_0_audio_set_mute(encoder, true);
+	dce_v6_0_audio_write_speaker_allocation(encoder);
+	dce_v6_0_audio_write_sad_regs(encoder);
+	dce_v6_0_audio_write_latency_fields(encoder, mode);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		dce_v6_0_audio_set_dto(encoder, mode->clock);
+		dce_v6_0_audio_set_vbi_packet(encoder);
+		dce_v6_0_audio_set_acr(encoder, mode->clock, bpc);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10);
+	}
+	dce_v6_0_audio_set_packet(encoder);
+	dce_v6_0_audio_select_pin(encoder);
+	dce_v6_0_audio_set_avi_infoframe(encoder, mode);
+	dce_v6_0_audio_set_mute(encoder, false);
+	if (em == ATOM_ENCODER_MODE_HDMI) {
+		dce_v6_0_audio_hdmi_enable(encoder, 1);
+	} else if (ENCODER_MODE_IS_DP(em)) {
+		dce_v6_0_audio_dp_enable(encoder, 1);
+	}
+
+	/* enable audio after setting up hw */
+	dce_v6_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (enable && dig->afmt->enabled)
+		return;
+
+	if (!enable && !dig->afmt->enabled)
+		return;
+
+	if (!enable && dig->afmt->pin) {
+		dce_v6_0_audio_enable(adev, dig->afmt->pin, false);
+		dig->afmt->pin = NULL;
+	}
+
+	dig->afmt->enabled = enable;
+
+	DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v6_0_afmt_init(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++)
+		adev->mode_info.afmt[i] = NULL;
+
+	/* DCE6 has audio blocks tied to DIG encoders */
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+		if (adev->mode_info.afmt[i]) {
+			adev->mode_info.afmt[i]->offset = dig_offsets[i];
+			adev->mode_info.afmt[i]->id = i;
+		} else {
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			DRM_ERROR("Out of memory allocating afmt table\n");
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void dce_v6_0_afmt_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		kfree(adev->mode_info.afmt[i]);
+		adev->mode_info.afmt[i] = NULL;
+	}
+}
+
+static const u32 vga_control_regs[6] =
+{
+	mmD1VGA_CONTROL,
+	mmD2VGA_CONTROL,
+	mmD3VGA_CONTROL,
+	mmD4VGA_CONTROL,
+	mmD5VGA_CONTROL,
+	mmD6VGA_CONTROL,
+};
+
+static void dce_v6_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 vga_control;
+
+	vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+	WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | (enable ? 1 : 0));
+}
+
+static void dce_v6_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, enable ? 1 : 0);
+}
+
+static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
+				     struct drm_framebuffer *fb,
+				     int x, int y, int atomic)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_framebuffer *target_fb;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *abo;
+	uint64_t fb_location, tiling_flags;
+	uint32_t fb_format, fb_pitch_pixels, pipe_config;
+	u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+	u32 viewport_w, viewport_h;
+	int r;
+	bool bypass_lut = false;
+
+	/* no fb bound */
+	if (!atomic && !crtc->primary->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	if (atomic)
+		target_fb = fb;
+	else
+		target_fb = crtc->primary->fb;
+
+	/* If atomic, assume fb object is pinned & idle & fenced and
+	 * just update base pointers
+	 */
+	obj = target_fb->obj[0];
+	abo = gem_to_amdgpu_bo(obj);
+	r = amdgpu_bo_reserve(abo, false);
+	if (unlikely(r != 0))
+		return r;
+
+	if (!atomic) {
+		abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+		if (unlikely(r != 0)) {
+			amdgpu_bo_unreserve(abo);
+			return -EINVAL;
+		}
+	}
+	fb_location = amdgpu_bo_gpu_offset(abo);
+
+	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+	amdgpu_bo_unreserve(abo);
+
+	switch (target_fb->format->format) {
+	case DRM_FORMAT_C8:
+		fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+		break;
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_ARGB4444:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_BGRA5551:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_RGB565:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_BGRA1010102:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+		fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+			   (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	default:
+		DRM_ERROR("Unsupported screen format %p4cc\n",
+			  &target_fb->format->format);
+		return -EINVAL;
+	}
+
+	if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+		unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+		bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+		bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+		mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+		tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+		num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+		fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+		fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+		fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+		fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+		fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+		fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
+	} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+		fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+	}
+
+	pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+	fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
+
+	dce_v6_0_vga_enable(crtc, false);
+
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+	WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+	/*
+	 * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+	 * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+	 * retain the full precision throughout the pipeline.
+	 */
+	WREG32_P(mmGRPH_LUT_10BIT_BYPASS + amdgpu_crtc->crtc_offset,
+		 (bypass_lut ? GRPH_LUT_10BIT_BYPASS__GRPH_LUT_10BIT_BYPASS_EN_MASK : 0),
+		 ~GRPH_LUT_10BIT_BYPASS__GRPH_LUT_10BIT_BYPASS_EN_MASK);
+
+	if (bypass_lut)
+		DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+	WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+	WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+	fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+	dce_v6_0_grph_enable(crtc, true);
+
+	WREG32(mmDESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+		       target_fb->height);
+	x &= ~3;
+	y &= ~1;
+	WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+	       (x << 16) | y);
+	viewport_w = crtc->mode.hdisplay;
+	viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+
+	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+	       (viewport_w << 16) | viewport_h);
+
+	/* set pageflip to happen anywhere in vblank interval */
+	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+	if (!atomic && fb && fb != crtc->primary->fb) {
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r != 0))
+			return r;
+		amdgpu_bo_unpin(abo);
+		amdgpu_bo_unreserve(abo);
+	}
+
+	/* Bytes per pixel may have changed */
+	dce_v6_0_bandwidth_update(adev);
+
+	return 0;
+
+}
+
+static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
+				    struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
+			DATA_FORMAT__INTERLEAVE_EN_MASK);
+	else
+		WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
+}
+
+static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u16 *r, *g, *b;
+	int i;
+
+	DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+	WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+		(INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+	WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
+	       PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
+	WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
+	       PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
+	WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+		(INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+
+	WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+	WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+	WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+	r = crtc->gamma_store;
+	g = r + crtc->gamma_size;
+	b = g + crtc->gamma_size;
+	for (i = 0; i < 256; i++) {
+		WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+		       ((*r++ & 0xffc0) << 14) |
+		       ((*g++ & 0xffc0) << 4) |
+		       (*b++ >> 6));
+	}
+
+	WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+	WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+		(GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+	WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+		(REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+	WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+		(OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+	/* XXX match this to the depth of the crtc fmt block, move to modeset? */
+	WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
+
+
+}
+
+static int dce_v6_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		return dig->linkb ? 1 : 0;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		return dig->linkb ? 3 : 2;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		return dig->linkb ? 5 : 4;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		return 6;
+	default:
+		DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+		return 0;
+	}
+}
+
+/**
+ * dce_v6_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc.  For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders.  For non-DP
+ * monitors a dedicated PPLL must be used.  If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself.  If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ *
+ */
+static u32 dce_v6_0_pick_pll(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 pll_in_use;
+	int pll;
+
+	if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+		if (adev->clock.dp_extclk)
+			/* skip PPLL programming if using ext clock */
+			return ATOM_PPLL_INVALID;
+		else
+			return ATOM_PPLL0;
+	} else {
+		/* use the same PPLL for all monitors with the same clock */
+		pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+		if (pll != ATOM_PPLL_INVALID)
+			return pll;
+	}
+
+	/*  PPLL1, and PPLL2 */
+	pll_in_use = amdgpu_pll_get_use_mask(crtc);
+	if (!(pll_in_use & (1 << ATOM_PPLL2)))
+		return ATOM_PPLL2;
+	if (!(pll_in_use & (1 << ATOM_PPLL1)))
+		return ATOM_PPLL1;
+	DRM_ERROR("unable to allocate a PPLL\n");
+	return ATOM_PPLL_INVALID;
+}
+
+static void dce_v6_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	uint32_t cur_lock;
+
+	cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+	if (lock)
+		cur_lock |= CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+	else
+		cur_lock &= ~CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+	WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(amdgpu_crtc->cursor_addr));
+	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       lower_32_bits(amdgpu_crtc->cursor_addr));
+
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       CUR_CONTROL__CURSOR_EN_MASK |
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
+				       int x, int y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	int xorigin = 0, yorigin = 0;
+
+	int w = amdgpu_crtc->cursor_width;
+
+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
+	/* avivo cursor are offset into the total surface */
+	x += crtc->x;
+	y += crtc->y;
+	DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+	if (x < 0) {
+		xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+		x = 0;
+	}
+	if (y < 0) {
+		yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+		y = 0;
+	}
+
+	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+	       ((w - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+	return 0;
+}
+
+static int dce_v6_0_crtc_cursor_move(struct drm_crtc *crtc,
+				     int x, int y)
+{
+	int ret;
+
+	dce_v6_0_lock_cursor(crtc, true);
+	ret = dce_v6_0_cursor_move_locked(crtc, x, y);
+	dce_v6_0_lock_cursor(crtc, false);
+
+	return ret;
+}
+
+static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
+				     struct drm_file *file_priv,
+				     uint32_t handle,
+				     uint32_t width,
+				     uint32_t height,
+				     int32_t hot_x,
+				     int32_t hot_y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *aobj;
+	int ret;
+
+	if (!handle) {
+		/* turn off cursor */
+		dce_v6_0_hide_cursor(crtc);
+		obj = NULL;
+		goto unpin;
+	}
+
+	if ((width > amdgpu_crtc->max_cursor_width) ||
+	    (height > amdgpu_crtc->max_cursor_height)) {
+		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+		return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+		return -ENOENT;
+	}
+
+	aobj = gem_to_amdgpu_bo(obj);
+	ret = amdgpu_bo_reserve(aobj, false);
+	if (ret != 0) {
+		drm_gem_object_put(obj);
+		return ret;
+	}
+
+	aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+	amdgpu_bo_unreserve(aobj);
+	if (ret) {
+		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+		drm_gem_object_put(obj);
+		return ret;
+	}
+	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+	dce_v6_0_lock_cursor(crtc, true);
+
+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height ||
+	    hot_x != amdgpu_crtc->cursor_hot_x ||
+	    hot_y != amdgpu_crtc->cursor_hot_y) {
+		int x, y;
+
+		x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+		y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+		dce_v6_0_cursor_move_locked(crtc, x, y);
+
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+		amdgpu_crtc->cursor_hot_x = hot_x;
+		amdgpu_crtc->cursor_hot_y = hot_y;
+	}
+
+	dce_v6_0_show_cursor(crtc);
+	dce_v6_0_lock_cursor(crtc, false);
+
+unpin:
+	if (amdgpu_crtc->cursor_bo) {
+		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+		ret = amdgpu_bo_reserve(aobj, true);
+		if (likely(ret == 0)) {
+			amdgpu_bo_unpin(aobj);
+			amdgpu_bo_unreserve(aobj);
+		}
+		drm_gem_object_put(amdgpu_crtc->cursor_bo);
+	}
+
+	amdgpu_crtc->cursor_bo = obj;
+	return 0;
+}
+
+static void dce_v6_0_cursor_reset(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (amdgpu_crtc->cursor_bo) {
+		dce_v6_0_lock_cursor(crtc, true);
+
+		dce_v6_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+					    amdgpu_crtc->cursor_y);
+
+		dce_v6_0_show_cursor(crtc);
+		dce_v6_0_lock_cursor(crtc, false);
+	}
+}
+
+static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				   u16 *blue, uint32_t size,
+				   struct drm_modeset_acquire_ctx *ctx)
+{
+	dce_v6_0_crtc_load_lut(crtc);
+
+	return 0;
+}
+
+static void dce_v6_0_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = {
+	.cursor_set2 = dce_v6_0_crtc_cursor_set2,
+	.cursor_move = dce_v6_0_crtc_cursor_move,
+	.gamma_set = dce_v6_0_crtc_gamma_set,
+	.set_config = amdgpu_display_crtc_set_config,
+	.destroy = dce_v6_0_crtc_destroy,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
+	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
+	.enable_vblank = amdgpu_enable_vblank_kms,
+	.disable_vblank = amdgpu_disable_vblank_kms,
+	.get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	unsigned type;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		amdgpu_crtc->enabled = true;
+		amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+		/* Make sure VBLANK and PFLIP interrupts are still enabled */
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
+		amdgpu_irq_update(adev, &adev->crtc_irq, type);
+		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+		drm_crtc_vblank_on(crtc);
+		dce_v6_0_crtc_load_lut(crtc);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		drm_crtc_vblank_off(crtc);
+		if (amdgpu_crtc->enabled)
+			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+		amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+		amdgpu_crtc->enabled = false;
+		break;
+	}
+	/* adjust pm to dpms */
+	amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v6_0_crtc_prepare(struct drm_crtc *crtc)
+{
+	/* disable crtc pair power gating before programming */
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v6_0_crtc_commit(struct drm_crtc *crtc)
+{
+	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v6_0_crtc_disable(struct drm_crtc *crtc)
+{
+
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_atom_ss ss;
+	int i;
+
+	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	if (crtc->primary->fb) {
+		int r;
+		struct amdgpu_bo *abo;
+
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r))
+			DRM_ERROR("failed to reserve abo before unpin\n");
+		else {
+			amdgpu_bo_unpin(abo);
+			amdgpu_bo_unreserve(abo);
+		}
+	}
+	/* disable the GRPH */
+	dce_v6_0_grph_enable(crtc, false);
+
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i] &&
+		    adev->mode_info.crtcs[i]->enabled &&
+		    i != amdgpu_crtc->crtc_id &&
+		    amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+			/* one other crtc is using this pll don't turn
+			 * off the pll
+			 */
+			goto done;
+		}
+	}
+
+	switch (amdgpu_crtc->pll_id) {
+	case ATOM_PPLL1:
+	case ATOM_PPLL2:
+		/* disable the ppll */
+		amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+						 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+		break;
+	default:
+		break;
+	}
+done:
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v6_0_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (!amdgpu_crtc->adjusted_clock)
+		return -EINVAL;
+
+	amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+	amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+	dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+	amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+	amdgpu_atombios_crtc_scaler_setup(crtc);
+	dce_v6_0_cursor_reset(crtc);
+	/* update the hw version fpr dpm */
+	amdgpu_crtc->hw_mode = *adjusted_mode;
+
+	return 0;
+}
+
+static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
+				     const struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	/* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc == crtc) {
+			amdgpu_crtc->encoder = encoder;
+			amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+			break;
+		}
+	}
+	if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+		amdgpu_crtc->encoder = NULL;
+		amdgpu_crtc->connector = NULL;
+		return false;
+	}
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
+	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+		return false;
+	/* pick pll */
+	amdgpu_crtc->pll_id = dce_v6_0_pick_pll(crtc);
+	/* if we can't get a PPLL for a non-DP encoder, fail */
+	if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+	    !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+		return false;
+
+	return true;
+}
+
+static int dce_v6_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				  struct drm_framebuffer *old_fb)
+{
+	return dce_v6_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v6_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+					 struct drm_framebuffer *fb,
+					 int x, int y, enum mode_set_atomic state)
+{
+	return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
+	.dpms = dce_v6_0_crtc_dpms,
+	.mode_fixup = dce_v6_0_crtc_mode_fixup,
+	.mode_set = dce_v6_0_crtc_mode_set,
+	.mode_set_base = dce_v6_0_crtc_set_base,
+	.mode_set_base_atomic = dce_v6_0_crtc_set_base_atomic,
+	.prepare = dce_v6_0_crtc_prepare,
+	.commit = dce_v6_0_crtc_commit,
+	.disable = dce_v6_0_crtc_disable,
+	.get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+	struct drm_framebuffer *fb;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_device *adev;
+	uint32_t fb_format;
+
+	if (!plane->fb)
+		return;
+
+	fb = plane->fb;
+	amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+	adev = drm_to_adev(fb->dev);
+
+	/* Disable DC tiling */
+	fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+	fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+	.get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+	.panic_flush = dce_v6_0_panic_flush,
+};
+
+static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+	struct amdgpu_crtc *amdgpu_crtc;
+
+	amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+			      (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+	if (amdgpu_crtc == NULL)
+		return -ENOMEM;
+
+	drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v6_0_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+	amdgpu_crtc->crtc_id = index;
+	adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+	amdgpu_crtc->max_cursor_width = CURSOR_WIDTH;
+	amdgpu_crtc->max_cursor_height = CURSOR_HEIGHT;
+	adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+	adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+	amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id];
+
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+	drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+	drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
+
+	return 0;
+}
+
+static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
+	adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
+
+	dce_v6_0_set_display_funcs(adev);
+
+	adev->mode_info.num_crtc = dce_v6_0_get_num_crtc(adev);
+
+	switch (adev->asic_type) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+		adev->mode_info.num_hpd = 6;
+		adev->mode_info.num_dig = 6;
+		break;
+	case CHIP_OLAND:
+		adev->mode_info.num_hpd = 2;
+		adev->mode_info.num_dig = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dce_v6_0_set_irq_funcs(adev);
+
+	return 0;
+}
+
+static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r, i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+		if (r)
+			return r;
+	}
+
+	for (i = 8; i < 20; i += 2) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+		if (r)
+			return r;
+	}
+
+	/* HPD hotplug */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+	if (r)
+		return r;
+
+	adev->mode_info.mode_config_initialized = true;
+
+	adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+	adev_to_drm(adev)->mode_config.async_page_flip = true;
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+	adev_to_drm(adev)->mode_config.preferred_depth = 24;
+	adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+	r = amdgpu_display_modeset_create_props(adev);
+	if (r)
+		return r;
+
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+
+	/* allocate crtcs */
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = dce_v6_0_crtc_init(adev, i);
+		if (r)
+			return r;
+	}
+
+	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+		amdgpu_display_print_display_setup(adev_to_drm(adev));
+	else
+		return -EINVAL;
+
+	/* setup afmt */
+	r = dce_v6_0_afmt_init(adev);
+	if (r)
+		return r;
+
+	r = dce_v6_0_audio_init(adev);
+	if (r)
+		return r;
+
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	/* Pre-DCE11 */
+	INIT_DELAYED_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
+	drm_kms_helper_poll_init(adev_to_drm(adev));
+
+	return r;
+}
+
+static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+
+	drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+	dce_v6_0_audio_fini(adev);
+	dce_v6_0_afmt_fini(adev);
+
+	drm_mode_config_cleanup(adev_to_drm(adev));
+	adev->mode_info.mode_config_initialized = false;
+
+	return 0;
+}
+
+static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* disable vga render */
+	dce_v6_0_set_vga_render_state(adev, false);
+	/* init dig PHYs, disp eng pll */
+	amdgpu_atombios_encoder_init_dig(adev);
+	amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+	/* initialize hpd */
+	dce_v6_0_hpd_init(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v6_0_pageflip_interrupt_init(adev);
+
+	return 0;
+}
+
+static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	dce_v6_0_hpd_fini(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v6_0_pageflip_interrupt_fini(adev);
+
+	flush_delayed_work(&adev->hotplug_work);
+
+	return 0;
+}
+
+static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_display_suspend_helper(adev);
+	if (r)
+		return r;
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+	return dce_v6_0_hw_fini(ip_block);
+}
+
+static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret;
+
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
+	ret = dce_v6_0_hw_init(ip_block);
+
+	/* turn on the BL */
+	if (adev->mode_info.bl_encoder) {
+		u8 bl_level = amdgpu_display_backlight_get_level(adev,
+								  adev->mode_info.bl_encoder);
+		amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+						    bl_level);
+	}
+	if (ret)
+		return ret;
+
+	return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 srbm_soft_reset = 0, tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (dce_v6_0_is_display_hung(adev))
+		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+	return 0;
+}
+
+static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+						     int crtc,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 reg_block, interrupt_mask;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (crtc) {
+	case 0:
+		reg_block = CRTC0_REGISTER_OFFSET;
+		break;
+	case 1:
+		reg_block = CRTC1_REGISTER_OFFSET;
+		break;
+	case 2:
+		reg_block = CRTC2_REGISTER_OFFSET;
+		break;
+	case 3:
+		reg_block = CRTC3_REGISTER_OFFSET;
+		break;
+	case 4:
+		reg_block = CRTC4_REGISTER_OFFSET;
+		break;
+	case 5:
+		reg_block = CRTC5_REGISTER_OFFSET;
+		break;
+	default:
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		interrupt_mask = RREG32(mmINT_MASK + reg_block);
+		interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
+		WREG32(mmINT_MASK + reg_block, interrupt_mask);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		interrupt_mask = RREG32(mmINT_MASK + reg_block);
+		interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
+		WREG32(mmINT_MASK + reg_block, interrupt_mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+						    int crtc,
+						    enum amdgpu_interrupt_state state)
+{
+
+}
+
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned hpd,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 dc_hpd_int_cntl;
+
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
+		return 0;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+		dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+		dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
+					     struct amdgpu_irq_src *src,
+					     unsigned type,
+					     enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CRTC_IRQ_VBLANK1:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK2:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK3:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK4:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK5:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK6:
+		dce_v6_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE1:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE2:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE3:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE4:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE5:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE6:
+		dce_v6_0_set_crtc_vline_interrupt_state(adev, 5, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	unsigned crtc = entry->src_id - 1;
+	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+								    crtc);
+
+	switch (entry->src_data[0]) {
+	case 0: /* vblank */
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
+			WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev_to_drm(adev), crtc);
+		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+		break;
+	case 1: /* vline */
+		if (disp_int & interrupt_status_offsets[crtc].vline)
+			WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+		break;
+	default:
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
+						 struct amdgpu_irq_src *src,
+						 unsigned type,
+						 enum amdgpu_interrupt_state state)
+{
+	u32 reg;
+
+	if (type >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", type);
+		return -EINVAL;
+	}
+
+	reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+	if (state == AMDGPU_IRQ_STATE_DISABLE)
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+	else
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+	return 0;
+}
+
+static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
+				 struct amdgpu_irq_src *source,
+				 struct amdgpu_iv_entry *entry)
+{
+	unsigned long flags;
+	unsigned crtc_id;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_flip_work *works;
+
+	crtc_id = (entry->src_id - 8) >> 1;
+	amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+	if (crtc_id >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+		return -EINVAL;
+	}
+
+	if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+	    GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+		WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+		       GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+	/* IRQ could occur when in initial stage */
+	if (amdgpu_crtc == NULL)
+		return 0;
+
+	spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+	works = amdgpu_crtc->pflip_works;
+	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+		DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+						"AMDGPU_FLIP_SUBMITTED(%d)\n",
+						amdgpu_crtc->pflip_status,
+						AMDGPU_FLIP_SUBMITTED);
+		spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+		return 0;
+	}
+
+	/* page flip completed. clean up */
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+	amdgpu_crtc->pflip_works = NULL;
+
+	/* wakeup usersapce */
+	if (works->event)
+		drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+	spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
+	schedule_work(&works->unpin_work);
+
+	return 0;
+}
+
+static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
+			    struct amdgpu_irq_src *source,
+			    struct amdgpu_iv_entry *entry)
+{
+	uint32_t disp_int, mask;
+	unsigned hpd;
+
+	if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		return 0;
+	}
+
+	hpd = entry->src_data[0];
+	disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+	mask = interrupt_status_offsets[hpd].hpd;
+
+	if (disp_int & mask) {
+		dce_v6_0_hpd_int_ack(adev, hpd);
+		schedule_delayed_work(&adev->hotplug_work, 0);
+		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+	}
+
+	return 0;
+}
+
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
+	.name = "dce_v6_0",
+	.early_init = dce_v6_0_early_init,
+	.sw_init = dce_v6_0_sw_init,
+	.sw_fini = dce_v6_0_sw_fini,
+	.hw_init = dce_v6_0_hw_init,
+	.hw_fini = dce_v6_0_hw_fini,
+	.suspend = dce_v6_0_suspend,
+	.resume = dce_v6_0_resume,
+	.is_idle = dce_v6_0_is_idle,
+	.soft_reset = dce_v6_0_soft_reset,
+	.set_clockgating_state = dce_v6_0_set_clockgating_state,
+	.set_powergating_state = dce_v6_0_set_powergating_state,
+};
+
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+	amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+	/* need to call this here rather than in prepare() since we need some crtc info */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	/* set scaler clears this on some chips */
+	dce_v6_0_set_interleave(encoder->crtc, mode);
+
+	if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) {
+		dce_v6_0_afmt_enable(encoder, true);
+		dce_v6_0_afmt_setmode(encoder, adjusted_mode);
+	}
+}
+
+static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+	if ((amdgpu_encoder->active_device &
+	     (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+	     ENCODER_OBJECT_ID_NONE)) {
+		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+		if (dig) {
+			dig->dig_encoder = dce_v6_0_pick_dig_encoder(encoder);
+			if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+				dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+		}
+	}
+
+	amdgpu_atombios_scratch_regs_lock(adev, true);
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		/* select the clock/data port if it uses a router */
+		if (amdgpu_connector->router.cd_valid)
+			amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+		/* turn eDP panel on for mode set */
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+			amdgpu_atombios_encoder_set_edp_panel_power(connector,
+							     ATOM_TRANSMITTER_ACTION_POWER_ON);
+	}
+
+	/* this is needed for the pll/ss setup to work correctly in some cases */
+	amdgpu_atombios_encoder_set_crtc_source(encoder);
+	/* set up the FMT blocks */
+	dce_v6_0_program_fmt(encoder);
+}
+
+static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	/* need to call this here as we need the crtc set up */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig;
+	int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
+
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	if (amdgpu_atombios_encoder_is_digital(encoder)) {
+		if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em))
+			dce_v6_0_afmt_enable(encoder, false);
+		dig = amdgpu_encoder->enc_priv;
+		dig->dig_encoder = -1;
+	}
+	amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v6_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+		      struct drm_display_mode *mode,
+		      struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static bool dce_v6_0_ext_mode_fixup(struct drm_encoder *encoder,
+				    const struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static const struct drm_encoder_helper_funcs dce_v6_0_ext_helper_funcs = {
+	.dpms = dce_v6_0_ext_dpms,
+	.mode_fixup = dce_v6_0_ext_mode_fixup,
+	.prepare = dce_v6_0_ext_prepare,
+	.mode_set = dce_v6_0_ext_mode_set,
+	.commit = dce_v6_0_ext_commit,
+	.disable = dce_v6_0_ext_disable,
+	/* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v6_0_dig_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v6_0_encoder_prepare,
+	.mode_set = dce_v6_0_encoder_mode_set,
+	.commit = dce_v6_0_encoder_commit,
+	.disable = dce_v6_0_encoder_disable,
+	.detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v6_0_dac_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v6_0_encoder_prepare,
+	.mode_set = dce_v6_0_encoder_mode_set,
+	.commit = dce_v6_0_encoder_commit,
+	.detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v6_0_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+		amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+	kfree(amdgpu_encoder->enc_priv);
+	drm_encoder_cleanup(encoder);
+	kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v6_0_encoder_funcs = {
+	.destroy = dce_v6_0_encoder_destroy,
+};
+
+static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
+				 uint32_t encoder_enum,
+				 uint32_t supported_device,
+				 u16 caps)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	/* see if we already added it */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		if (amdgpu_encoder->encoder_enum == encoder_enum) {
+			amdgpu_encoder->devices |= supported_device;
+			return;
+		}
+	}
+
+	/* add a new one */
+	amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+	if (!amdgpu_encoder)
+		return;
+
+	encoder = &amdgpu_encoder->base;
+	switch (adev->mode_info.num_crtc) {
+	case 1:
+		encoder->possible_crtcs = 0x1;
+		break;
+	case 2:
+	default:
+		encoder->possible_crtcs = 0x3;
+		break;
+	case 4:
+		encoder->possible_crtcs = 0xf;
+		break;
+	case 6:
+		encoder->possible_crtcs = 0x3f;
+		break;
+	}
+
+	amdgpu_encoder->enc_priv = NULL;
+	amdgpu_encoder->encoder_enum = encoder_enum;
+	amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	amdgpu_encoder->devices = supported_device;
+	amdgpu_encoder->rmx_type = RMX_OFF;
+	amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+	amdgpu_encoder->is_ext_encoder = false;
+	amdgpu_encoder->caps = caps;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
+		drm_encoder_helper_add(encoder, &dce_v6_0_dac_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+			amdgpu_encoder->rmx_type = RMX_FULL;
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		} else {
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		}
+		drm_encoder_helper_add(encoder, &dce_v6_0_dig_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_SI170B:
+	case ENCODER_OBJECT_ID_CH7303:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+	case ENCODER_OBJECT_ID_TITFP513:
+	case ENCODER_OBJECT_ID_VT1623:
+	case ENCODER_OBJECT_ID_HDMI_SI1930:
+	case ENCODER_OBJECT_ID_TRAVIS:
+	case ENCODER_OBJECT_ID_NUTMEG:
+		/* these are handled by the primary encoders */
+		amdgpu_encoder->is_ext_encoder = true;
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+		else
+			drm_encoder_init(dev, encoder, &dce_v6_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+		drm_encoder_helper_add(encoder, &dce_v6_0_ext_helper_funcs);
+		break;
+	}
+}
+
+static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
+	.bandwidth_update = &dce_v6_0_bandwidth_update,
+	.vblank_get_counter = &dce_v6_0_vblank_get_counter,
+	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+	.hpd_sense = &dce_v6_0_hpd_sense,
+	.hpd_set_polarity = &dce_v6_0_hpd_set_polarity,
+	.hpd_get_gpio_reg = &dce_v6_0_hpd_get_gpio_reg,
+	.page_flip = &dce_v6_0_page_flip,
+	.page_flip_get_scanoutpos = &dce_v6_0_crtc_get_scanoutpos,
+	.add_encoder = &dce_v6_0_encoder_add,
+	.add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
+{
+	adev->mode_info.funcs = &dce_v6_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
+	.set = dce_v6_0_set_crtc_irq_state,
+	.process = dce_v6_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
+	.set = dce_v6_0_set_pageflip_irq_state,
+	.process = dce_v6_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
+	.set = dce_v6_0_set_hpd_irq_state,
+	.process = dce_v6_0_hpd_irq,
+};
+
+static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.num_crtc > 0)
+		adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+	else
+		adev->crtc_irq.num_types = 0;
+	adev->crtc_irq.funcs = &dce_v6_0_crtc_irq_funcs;
+
+	adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+	adev->pageflip_irq.funcs = &dce_v6_0_pageflip_irq_funcs;
+
+	adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+	adev->hpd_irq.funcs = &dce_v6_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v6_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 6,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v6_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v6_4_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 6,
+	.minor = 4,
+	.rev = 0,
+	.funcs = &dce_v6_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
new file mode 100644
index 000000000000..7b546b596de1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V6_0_H__
+#define __DCE_V6_0_H__
+
+extern const struct amdgpu_ip_block_version dce_v6_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v6_4_ip_block;
+
+void dce_v6_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
new file mode 100644
index 000000000000..2ccd6aad8dd6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -0,0 +1,3611 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_edid.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_vblank.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_i2c.h"
+#include "cikd.h"
+#include "atom.h"
+#include "amdgpu_atombios.h"
+#include "atombios_crtc.h"
+#include "atombios_encoders.h"
+#include "amdgpu_pll.h"
+#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
+#include "dce_v8_0.h"
+
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#include "gca/gfx_7_2_enum.h"
+
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
+
+#include "oss/oss_2_0_d.h"
+#include "oss/oss_2_0_sh_mask.h"
+
+static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev);
+static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const u32 crtc_offsets[6] = {
+	CRTC0_REGISTER_OFFSET,
+	CRTC1_REGISTER_OFFSET,
+	CRTC2_REGISTER_OFFSET,
+	CRTC3_REGISTER_OFFSET,
+	CRTC4_REGISTER_OFFSET,
+	CRTC5_REGISTER_OFFSET
+};
+
+static const u32 hpd_offsets[] = {
+	HPD0_REGISTER_OFFSET,
+	HPD1_REGISTER_OFFSET,
+	HPD2_REGISTER_OFFSET,
+	HPD3_REGISTER_OFFSET,
+	HPD4_REGISTER_OFFSET,
+	HPD5_REGISTER_OFFSET
+};
+
+static const uint32_t dig_offsets[] = {
+	CRTC0_REGISTER_OFFSET,
+	CRTC1_REGISTER_OFFSET,
+	CRTC2_REGISTER_OFFSET,
+	CRTC3_REGISTER_OFFSET,
+	CRTC4_REGISTER_OFFSET,
+	CRTC5_REGISTER_OFFSET,
+	(0x13830 - 0x7030) >> 2,
+};
+
+static const struct {
+	uint32_t	reg;
+	uint32_t	vblank;
+	uint32_t	vline;
+	uint32_t	hpd;
+
+} interrupt_status_offsets[6] = { {
+	.reg = mmDISP_INTERRUPT_STATUS,
+	.vblank = DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE2,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE3,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE4,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK
+}, {
+	.reg = mmDISP_INTERRUPT_STATUS_CONTINUE5,
+	.vblank = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK,
+	.vline = DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK,
+	.hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK
+} };
+
+static u32 dce_v8_0_audio_endpt_rreg(struct amdgpu_device *adev,
+				     u32 block_offset, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+
+	return r;
+}
+
+static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
+				      u32 block_offset, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+	spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
+}
+
+static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
+{
+	if (crtc >= adev->mode_info.num_crtc)
+		return 0;
+	else
+		return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]);
+}
+
+static void dce_v8_0_pageflip_interrupt_init(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Enable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_get(adev, &adev->pageflip_irq, i);
+}
+
+static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	/* Disable pflip interrupts */
+	for (i = 0; i < adev->mode_info.num_crtc; i++)
+		amdgpu_irq_put(adev, &adev->pageflip_irq, i);
+}
+
+/**
+ * dce_v8_0_page_flip - pageflip callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ * @async: asynchronous flip
+ *
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
+ */
+static void dce_v8_0_page_flip(struct amdgpu_device *adev,
+			       int crtc_id, u64 crtc_base, bool async)
+{
+	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;
+
+	/* flip at hsync for async, default is vsync */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?
+	       GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0);
+	/* update pitch */
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset,
+	       fb->pitches[0] / fb->format->cpp[0]);
+	/* update the primary scanout addresses */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       lower_32_bits(crtc_base));
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
+}
+
+static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
+					u32 *vbl, u32 *position)
+{
+	if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
+		return -EINVAL;
+
+	*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
+	*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
+
+	return 0;
+}
+
+/**
+ * dce_v8_0_hpd_sense - hpd sense callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
+static bool dce_v8_0_hpd_sense(struct amdgpu_device *adev,
+			       enum amdgpu_hpd_id hpd)
+{
+	bool connected = false;
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return connected;
+
+	if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+	    DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+		connected = true;
+
+	return connected;
+}
+
+/**
+ * dce_v8_0_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @adev: amdgpu_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
+static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev,
+				      enum amdgpu_hpd_id hpd)
+{
+	u32 tmp;
+	bool connected = dce_v8_0_hpd_sense(adev, hpd);
+
+	if (hpd >= adev->mode_info.num_hpd)
+		return;
+
+	tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+	if (connected)
+		tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	else
+		tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_POLARITY_MASK;
+	WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
+				 int hpd)
+{
+	u32 tmp;
+
+	if (hpd >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", hpd);
+		return;
+	}
+
+	tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+	tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
+	WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
+}
+
+/**
+ * dce_v8_0_hpd_init - hpd setup callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
+static void dce_v8_0_hpd_init(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp |= DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP ||
+		    connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+			/* don't try to enable hpd on eDP or LVDS avoid breaking the
+			 * aux dp channel on imac and help (but not completely fix)
+			 * https://bugzilla.redhat.com/show_bug.cgi?id=726143
+			 * also avoid interrupt storms during dpms.
+			 */
+			tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+			tmp &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+			WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+			continue;
+		}
+
+		dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd);
+		dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd);
+		amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+/**
+ * dce_v8_0_hpd_fini - hpd tear down callback.
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
+static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	u32 tmp;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		if (amdgpu_connector->hpd.hpd >= adev->mode_info.num_hpd)
+			continue;
+
+		tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
+		tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
+		WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
+
+		amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
+	}
+	drm_connector_list_iter_end(&iter);
+}
+
+static u32 dce_v8_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
+{
+	return mmDC_GPIO_HPD_A;
+}
+
+static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev)
+{
+	u32 crtc_hung = 0;
+	u32 crtc_status[6];
+	u32 i, j, tmp;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+			crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+			crtc_hung |= (1 << i);
+		}
+	}
+
+	for (j = 0; j < 10; j++) {
+		for (i = 0; i < adev->mode_info.num_crtc; i++) {
+			if (crtc_hung & (1 << i)) {
+				tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+				if (tmp != crtc_status[i])
+					crtc_hung &= ~(1 << i);
+			}
+		}
+		if (crtc_hung == 0)
+			return false;
+		udelay(100);
+	}
+
+	return true;
+}
+
+static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev,
+					  bool render)
+{
+	u32 tmp;
+
+	/* Lockout access through VGA aperture*/
+	tmp = RREG32(mmVGA_HDP_CONTROL);
+	if (render)
+		tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 0);
+	else
+		tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
+	WREG32(mmVGA_HDP_CONTROL, tmp);
+
+	/* disable VGA render */
+	tmp = RREG32(mmVGA_RENDER_CONTROL);
+	if (render)
+		tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 1);
+	else
+		tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
+	WREG32(mmVGA_RENDER_CONTROL, tmp);
+}
+
+static int dce_v8_0_get_num_crtc(struct amdgpu_device *adev)
+{
+	int num_crtc = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		num_crtc = 6;
+		break;
+	case CHIP_KAVERI:
+		num_crtc = 4;
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		num_crtc = 2;
+		break;
+	default:
+		num_crtc = 0;
+	}
+	return num_crtc;
+}
+
+void dce_v8_0_disable_dce(struct amdgpu_device *adev)
+{
+	/*Disable VGA render and enabled crtc, if has DCE engine*/
+	if (amdgpu_atombios_has_dce_engine_info(adev)) {
+		u32 tmp;
+		int crtc_enabled, i;
+
+		dce_v8_0_set_vga_render_state(adev, false);
+
+		/*Disable crtc*/
+		for (i = 0; i < dce_v8_0_get_num_crtc(adev); i++) {
+			crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
+									 CRTC_CONTROL, CRTC_MASTER_EN);
+			if (crtc_enabled) {
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]);
+				tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0);
+				WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp);
+				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
+			}
+		}
+	}
+}
+
+static void dce_v8_0_program_fmt(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	int bpc = 0;
+	u32 tmp = 0;
+	enum amdgpu_connector_dither dither = AMDGPU_FMT_DITHER_DISABLE;
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+		bpc = amdgpu_connector_get_monitor_bpc(connector);
+		dither = amdgpu_connector->dither;
+	}
+
+	/* LVDS/eDP FMT is set up by atom */
+	if (amdgpu_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
+		return;
+
+	/* not needed for analog */
+	if ((amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
+	    (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
+		return;
+
+	if (bpc == 0)
+		return;
+
+	switch (bpc) {
+	case 6:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE)
+			/* XXX sort out optimal dither settings */
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+				(0 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+		else
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+			(0 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+		break;
+	case 8:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE)
+			/* XXX sort out optimal dither settings */
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+				(1 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+		else
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+			(1 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+		break;
+	case 10:
+		if (dither == AMDGPU_FMT_DITHER_ENABLE)
+			/* XXX sort out optimal dither settings */
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_FRAME_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_HIGHPASS_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_RGB_RANDOM_ENABLE_MASK |
+				FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_EN_MASK |
+				(2 << FMT_BIT_DEPTH_CONTROL__FMT_SPATIAL_DITHER_DEPTH__SHIFT));
+		else
+			tmp |= (FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_EN_MASK |
+			(2 << FMT_BIT_DEPTH_CONTROL__FMT_TRUNCATE_DEPTH__SHIFT));
+		break;
+	default:
+		/* not needed */
+		break;
+	}
+
+	WREG32(mmFMT_BIT_DEPTH_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+}
+
+
+/* display watermark setup */
+/**
+ * dce_v8_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
+static u32 dce_v8_0_line_buffer_adjust(struct amdgpu_device *adev,
+				       struct amdgpu_crtc *amdgpu_crtc,
+				       struct drm_display_mode *mode)
+{
+	u32 tmp, buffer_alloc, i;
+	u32 pipe_offset = amdgpu_crtc->crtc_id * 0x8;
+	/*
+	 * Line Buffer Setup
+	 * There are 6 line buffers, one for each display controllers.
+	 * There are 3 partitions per LB. Select the number of partitions
+	 * to enable based on the display width.  For display widths larger
+	 * than 4096, you need use to use 2 display controllers and combine
+	 * them using the stereo blender.
+	 */
+	if (amdgpu_crtc->base.enabled && mode) {
+		if (mode->crtc_hdisplay < 1920) {
+			tmp = 1;
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 2560) {
+			tmp = 2;
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 4096) {
+			tmp = 0;
+			buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+		} else {
+			DRM_DEBUG_KMS("Mode too big for LB!\n");
+			tmp = 0;
+			buffer_alloc = (adev->flags & AMD_IS_APU) ? 2 : 4;
+		}
+	} else {
+		tmp = 1;
+		buffer_alloc = 0;
+	}
+
+	WREG32(mmLB_MEMORY_CTRL + amdgpu_crtc->crtc_offset,
+	      (tmp << LB_MEMORY_CTRL__LB_MEMORY_CONFIG__SHIFT) |
+	      (0x6B0 << LB_MEMORY_CTRL__LB_MEMORY_SIZE__SHIFT));
+
+	WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+	       (buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+		    PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATION_COMPLETED_MASK)
+			break;
+		udelay(1);
+	}
+
+	if (amdgpu_crtc->base.enabled && mode) {
+		switch (tmp) {
+		case 0:
+		default:
+			return 4096 * 2;
+		case 1:
+			return 1920 * 2;
+		case 2:
+			return 2560 * 2;
+		}
+	}
+
+	/* controller not enabled, so no lb used */
+	return 0;
+}
+
+/**
+ * cik_get_number_of_dram_channels - get the number of dram channels
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the number of video ram channels (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the number of dram channels
+ */
+static u32 cik_get_number_of_dram_channels(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32(mmMC_SHARED_CHMAP);
+
+	switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) {
+	case 0:
+	default:
+		return 1;
+	case 1:
+		return 2;
+	case 2:
+		return 4;
+	case 3:
+		return 8;
+	case 4:
+		return 3;
+	case 5:
+		return 6;
+	case 6:
+		return 10;
+	case 7:
+		return 12;
+	case 8:
+		return 16;
+	}
+}
+
+struct dce8_wm_params {
+	u32 dram_channels; /* number of dram channels */
+	u32 yclk;          /* bandwidth per dram data pin in kHz */
+	u32 sclk;          /* engine clock in kHz */
+	u32 disp_clk;      /* display clock in kHz */
+	u32 src_width;     /* viewport width */
+	u32 active_time;   /* active display time in ns */
+	u32 blank_time;    /* blank time in ns */
+	bool interlaced;    /* mode is interlaced */
+	fixed20_12 vsc;    /* vertical scale ratio */
+	u32 num_heads;     /* number of active crtcs */
+	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
+	u32 lb_size;       /* line buffer allocated to pipe */
+	u32 vtaps;         /* vertical scaler taps */
+};
+
+/**
+ * dce_v8_0_dram_bandwidth - get the dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the raw dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_dram_bandwidth(struct dce8_wm_params *wm)
+{
+	/* Calculate raw DRAM Bandwidth */
+	fixed20_12 dram_efficiency; /* 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	dram_efficiency.full = dfixed_const(7);
+	dram_efficiency.full = dfixed_div(dram_efficiency, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_dram_bandwidth_for_display - get the dram bandwidth for display
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dram bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dram bandwidth for display in MBytes/s
+ */
+static u32 dce_v8_0_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+	/* Calculate DRAM Bandwidth and the part allocated to display. */
+	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
+	fixed20_12 yclk, dram_channels, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	yclk.full = dfixed_const(wm->yclk);
+	yclk.full = dfixed_div(yclk, a);
+	dram_channels.full = dfixed_const(wm->dram_channels * 4);
+	a.full = dfixed_const(10);
+	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
+	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
+	bandwidth.full = dfixed_mul(dram_channels, yclk);
+	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_data_return_bandwidth - get the data return bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the data return bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the data return bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_data_return_bandwidth(struct dce8_wm_params *wm)
+{
+	/* Calculate the display Data return Bandwidth */
+	fixed20_12 return_efficiency; /* 0.8 */
+	fixed20_12 sclk, bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	sclk.full = dfixed_const(wm->sclk);
+	sclk.full = dfixed_div(sclk, a);
+	a.full = dfixed_const(10);
+	return_efficiency.full = dfixed_const(8);
+	return_efficiency.full = dfixed_div(return_efficiency, a);
+	a.full = dfixed_const(32);
+	bandwidth.full = dfixed_mul(a, sclk);
+	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_dmif_request_bandwidth - get the dmif bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the dmif bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the dmif bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_dmif_request_bandwidth(struct dce8_wm_params *wm)
+{
+	/* Calculate the DMIF Request Bandwidth */
+	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
+	fixed20_12 disp_clk, bandwidth;
+	fixed20_12 a, b;
+
+	a.full = dfixed_const(1000);
+	disp_clk.full = dfixed_const(wm->disp_clk);
+	disp_clk.full = dfixed_div(disp_clk, a);
+	a.full = dfixed_const(32);
+	b.full = dfixed_mul(a, disp_clk);
+
+	a.full = dfixed_const(10);
+	disp_clk_request_efficiency.full = dfixed_const(8);
+	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
+
+	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_available_bandwidth - get the min available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the min available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the min available bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_available_bandwidth(struct dce8_wm_params *wm)
+{
+	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
+	u32 dram_bandwidth = dce_v8_0_dram_bandwidth(wm);
+	u32 data_return_bandwidth = dce_v8_0_data_return_bandwidth(wm);
+	u32 dmif_req_bandwidth = dce_v8_0_dmif_request_bandwidth(wm);
+
+	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
+}
+
+/**
+ * dce_v8_0_average_bandwidth - get the average available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the average available bandwidth used for display (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the average available bandwidth in MBytes/s
+ */
+static u32 dce_v8_0_average_bandwidth(struct dce8_wm_params *wm)
+{
+	/* Calculate the display mode Average Bandwidth
+	 * DisplayMode should contain the source and destination dimensions,
+	 * timing, etc.
+	 */
+	fixed20_12 bpp;
+	fixed20_12 line_time;
+	fixed20_12 src_width;
+	fixed20_12 bandwidth;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1000);
+	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
+	line_time.full = dfixed_div(line_time, a);
+	bpp.full = dfixed_const(wm->bytes_per_pixel);
+	src_width.full = dfixed_const(wm->src_width);
+	bandwidth.full = dfixed_mul(src_width, bpp);
+	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
+	bandwidth.full = dfixed_div(bandwidth, line_time);
+
+	return dfixed_trunc(bandwidth);
+}
+
+/**
+ * dce_v8_0_latency_watermark - get the latency watermark
+ *
+ * @wm: watermark calculation data
+ *
+ * Calculate the latency watermark (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns the latency watermark in ns
+ */
+static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm)
+{
+	/* First calculate the latency in ns */
+	u32 mc_latency = 2000; /* 2000 ns. */
+	u32 available_bandwidth = dce_v8_0_available_bandwidth(wm);
+	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
+	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
+	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
+	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
+		(wm->num_heads * cursor_line_pair_return_time);
+	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
+	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
+	u32 tmp, dmif_size = 12288;
+	fixed20_12 a, b, c;
+
+	if (wm->num_heads == 0)
+		return 0;
+
+	a.full = dfixed_const(2);
+	b.full = dfixed_const(1);
+	if ((wm->vsc.full > a.full) ||
+	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
+	    (wm->vtaps >= 5) ||
+	    ((wm->vsc.full >= a.full) && wm->interlaced))
+		max_src_lines_per_dst_line = 4;
+	else
+		max_src_lines_per_dst_line = 2;
+
+	a.full = dfixed_const(available_bandwidth);
+	b.full = dfixed_const(wm->num_heads);
+	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
+
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
+
+	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
+	b.full = dfixed_const(1000);
+	c.full = dfixed_const(lb_fill_bw);
+	b.full = dfixed_div(c, b);
+	a.full = dfixed_div(a, b);
+	line_fill_time = dfixed_trunc(a);
+
+	if (line_fill_time < wm->active_time)
+		return latency;
+	else
+		return latency + (line_fill_time - wm->active_time);
+
+}
+
+/**
+ * dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display - check
+ * average and available dram bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * dram bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
+{
+	if (dce_v8_0_average_bandwidth(wm) <=
+	    (dce_v8_0_dram_bandwidth_for_display(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v8_0_average_bandwidth_vs_available_bandwidth - check
+ * average and available bandwidth
+ *
+ * @wm: watermark calculation data
+ *
+ * Check if the display average bandwidth fits in the display
+ * available bandwidth (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
+{
+	if (dce_v8_0_average_bandwidth(wm) <=
+	    (dce_v8_0_available_bandwidth(wm) / wm->num_heads))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v8_0_check_latency_hiding - check latency hiding
+ *
+ * @wm: watermark calculation data
+ *
+ * Check latency hiding (CIK).
+ * Used for display watermark bandwidth calculations
+ * Returns true if the display fits, false if not.
+ */
+static bool dce_v8_0_check_latency_hiding(struct dce8_wm_params *wm)
+{
+	u32 lb_partitions = wm->lb_size / wm->src_width;
+	u32 line_time = wm->active_time + wm->blank_time;
+	u32 latency_tolerant_lines;
+	u32 latency_hiding;
+	fixed20_12 a;
+
+	a.full = dfixed_const(1);
+	if (wm->vsc.full > a.full)
+		latency_tolerant_lines = 1;
+	else {
+		if (lb_partitions <= (wm->vtaps + 1))
+			latency_tolerant_lines = 1;
+		else
+			latency_tolerant_lines = 2;
+	}
+
+	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
+
+	if (dce_v8_0_latency_watermark(wm) <= latency_hiding)
+		return true;
+	else
+		return false;
+}
+
+/**
+ * dce_v8_0_program_watermarks - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @lb_size: line buffer size
+ * @num_heads: number of display controllers in use
+ *
+ * Calculate and program the display watermarks for the
+ * selected display controller (CIK).
+ */
+static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
+					struct amdgpu_crtc *amdgpu_crtc,
+					u32 lb_size, u32 num_heads)
+{
+	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
+	struct dce8_wm_params wm_low, wm_high;
+	u32 active_time;
+	u32 line_time = 0;
+	u32 latency_watermark_a = 0, latency_watermark_b = 0;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
+
+	if (amdgpu_crtc->base.enabled && num_heads && mode) {
+		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
+					    (u32)mode->clock);
+		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
+					  (u32)mode->clock);
+		line_time = min_t(u32, line_time, 65535);
+
+		/* watermark for high clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_high.yclk =
+				amdgpu_dpm_get_mclk(adev, false) * 10;
+			wm_high.sclk =
+				amdgpu_dpm_get_sclk(adev, false) * 10;
+		} else {
+			wm_high.yclk = adev->pm.current_mclk * 10;
+			wm_high.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_high.disp_clk = mode->clock;
+		wm_high.src_width = mode->crtc_hdisplay;
+		wm_high.active_time = active_time;
+		wm_high.blank_time = line_time - wm_high.active_time;
+		wm_high.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_high.interlaced = true;
+		wm_high.vsc = amdgpu_crtc->vsc;
+		wm_high.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_high.vtaps = 2;
+		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_high.lb_size = lb_size;
+		wm_high.dram_channels = cik_get_number_of_dram_channels(adev);
+		wm_high.num_heads = num_heads;
+
+		/* set for high clocks */
+		latency_watermark_a = min_t(u32, dce_v8_0_latency_watermark(&wm_high), 65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+		    !dce_v8_0_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+		    !dce_v8_0_check_latency_hiding(&wm_high) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+		}
+
+		/* watermark for low clocks */
+		if (adev->pm.dpm_enabled) {
+			wm_low.yclk =
+				amdgpu_dpm_get_mclk(adev, true) * 10;
+			wm_low.sclk =
+				amdgpu_dpm_get_sclk(adev, true) * 10;
+		} else {
+			wm_low.yclk = adev->pm.current_mclk * 10;
+			wm_low.sclk = adev->pm.current_sclk * 10;
+		}
+
+		wm_low.disp_clk = mode->clock;
+		wm_low.src_width = mode->crtc_hdisplay;
+		wm_low.active_time = active_time;
+		wm_low.blank_time = line_time - wm_low.active_time;
+		wm_low.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_low.interlaced = true;
+		wm_low.vsc = amdgpu_crtc->vsc;
+		wm_low.vtaps = 1;
+		if (amdgpu_crtc->rmx_type != RMX_OFF)
+			wm_low.vtaps = 2;
+		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_low.lb_size = lb_size;
+		wm_low.dram_channels = cik_get_number_of_dram_channels(adev);
+		wm_low.num_heads = num_heads;
+
+		/* set for low clocks */
+		latency_watermark_b = min_t(u32, dce_v8_0_latency_watermark(&wm_low), 65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce_v8_0_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+		    !dce_v8_0_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+		    !dce_v8_0_check_latency_hiding(&wm_low) ||
+		    (adev->mode_info.disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
+	}
+
+	/* select wm A */
+	wm_mask = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp = wm_mask;
+	tmp &= ~(3 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (1 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+		(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+	/* select wm B */
+	tmp = RREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset);
+	tmp &= ~(3 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+	tmp |= (2 << DPG_WATERMARK_MASK_CONTROL__URGENCY_WATERMARK_MASK__SHIFT);
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, tmp);
+	WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
+		(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
+	/* restore original selection */
+	WREG32(mmDPG_WATERMARK_MASK_CONTROL + amdgpu_crtc->crtc_offset, wm_mask);
+
+	/* save values for DPM */
+	amdgpu_crtc->line_time = line_time;
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
+}
+
+/**
+ * dce_v8_0_bandwidth_update - program display watermarks
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Calculate and program the display watermarks and line
+ * buffer allocation (CIK).
+ */
+static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev)
+{
+	struct drm_display_mode *mode = NULL;
+	u32 num_heads = 0, lb_size;
+	int i;
+
+	amdgpu_display_update_priority(adev);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i]->base.enabled)
+			num_heads++;
+	}
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		mode = &adev->mode_info.crtcs[i]->base.mode;
+		lb_size = dce_v8_0_line_buffer_adjust(adev, adev->mode_info.crtcs[i], mode);
+		dce_v8_0_program_watermarks(adev, adev->mode_info.crtcs[i],
+					    lb_size, num_heads);
+	}
+}
+
+static void dce_v8_0_audio_get_connected_pins(struct amdgpu_device *adev)
+{
+	int i;
+	u32 offset, tmp;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		offset = adev->mode_info.audio.pin[i].offset;
+		tmp = RREG32_AUDIO_ENDPT(offset,
+					 ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+		if (((tmp &
+		AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY_MASK) >>
+		AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT__PORT_CONNECTIVITY__SHIFT) == 1)
+			adev->mode_info.audio.pin[i].connected = false;
+		else
+			adev->mode_info.audio.pin[i].connected = true;
+	}
+}
+
+static struct amdgpu_audio_pin *dce_v8_0_audio_get_pin(struct amdgpu_device *adev)
+{
+	int i;
+
+	dce_v8_0_audio_get_connected_pins(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		if (adev->mode_info.audio.pin[i].connected)
+			return &adev->mode_info.audio.pin[i];
+	}
+	DRM_ERROR("No connected audio pins found!\n");
+	return NULL;
+}
+
+static void dce_v8_0_afmt_audio_select_pin(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 offset;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->offset;
+
+	WREG32(mmAFMT_AUDIO_SRC_CONTROL + offset,
+	       (dig->afmt->pin->id << AFMT_AUDIO_SRC_CONTROL__AFMT_AUDIO_SRC_SELECT__SHIFT));
+}
+
+static void dce_v8_0_audio_write_latency_fields(struct drm_encoder *encoder,
+						struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u32 tmp = 0, offset;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		if (connector->latency_present[1])
+			tmp =
+			(connector->video_latency[1] <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+			(connector->audio_latency[1] <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+		else
+			tmp =
+			(0 <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+			(0 <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+	} else {
+		if (connector->latency_present[0])
+			tmp =
+			(connector->video_latency[0] <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+			(connector->audio_latency[0] <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+		else
+			tmp =
+			(0 <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__VIDEO_LIPSYNC__SHIFT) |
+			(0 <<
+			 AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC__AUDIO_LIPSYNC__SHIFT);
+
+	}
+	WREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp);
+}
+
+static void dce_v8_0_audio_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	u32 offset, tmp;
+	u8 *sadb = NULL;
+	int sad_count;
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(amdgpu_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		sad_count = 0;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+	tmp &= ~(AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__DP_CONNECTION_MASK |
+		AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION_MASK);
+	/* set HDMI mode */
+	tmp |= AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__HDMI_CONNECTION_MASK;
+	if (sad_count)
+		tmp |= (sadb[0] << AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION__SHIFT);
+	else
+		tmp |= (5 << AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER__SPEAKER_ALLOCATION__SHIFT); /* stereo */
+	WREG32_AUDIO_ENDPT(offset, ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	u32 offset;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+	struct amdgpu_connector *amdgpu_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	if (!dig || !dig->afmt || !dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter) {
+		if (connector->encoder == encoder) {
+			amdgpu_connector = to_amdgpu_connector(connector);
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&iter);
+
+	if (!amdgpu_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(amdgpu_connector->edid, &sads);
+	if (sad_count < 0)
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+	if (sad_count <= 0)
+		return;
+	BUG_ON(!sads);
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 value = 0;
+		u8 stereo_freqs = 0;
+		int max_channels = -1;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				if (sad->channels > max_channels) {
+					value = (sad->channels <<
+						 AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+						(sad->byte2 <<
+						 AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+						(sad->freq <<
+						 AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
+					max_channels = sad->channels;
+				}
+
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					stereo_freqs |= sad->freq;
+				else
+					break;
+			}
+		}
+
+		value |= (stereo_freqs <<
+			AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+		WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
+	}
+
+	kfree(sads);
+}
+
+static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
+				  struct amdgpu_audio_pin *pin,
+				  bool enable)
+{
+	if (!pin)
+		return;
+
+	WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
+		enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0);
+}
+
+static const u32 pin_offsets[7] = {
+	AUD0_REGISTER_OFFSET,
+	AUD1_REGISTER_OFFSET,
+	AUD2_REGISTER_OFFSET,
+	AUD3_REGISTER_OFFSET,
+	AUD4_REGISTER_OFFSET,
+	AUD5_REGISTER_OFFSET,
+	AUD6_REGISTER_OFFSET,
+};
+
+static int dce_v8_0_audio_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	if (!amdgpu_audio)
+		return 0;
+
+	adev->mode_info.audio.enabled = true;
+
+	if (adev->asic_type == CHIP_KAVERI) /* KV: 4 streams, 7 endpoints */
+		adev->mode_info.audio.num_pins = 7;
+	else if ((adev->asic_type == CHIP_KABINI) ||
+		 (adev->asic_type == CHIP_MULLINS)) /* KB/ML: 2 streams, 3 endpoints */
+		adev->mode_info.audio.num_pins = 3;
+	else if ((adev->asic_type == CHIP_BONAIRE) ||
+		 (adev->asic_type == CHIP_HAWAII))/* BN/HW: 6 streams, 7 endpoints */
+		adev->mode_info.audio.num_pins = 7;
+	else
+		adev->mode_info.audio.num_pins = 3;
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		adev->mode_info.audio.pin[i].channels = -1;
+		adev->mode_info.audio.pin[i].rate = -1;
+		adev->mode_info.audio.pin[i].bits_per_sample = -1;
+		adev->mode_info.audio.pin[i].status_bits = 0;
+		adev->mode_info.audio.pin[i].category_code = 0;
+		adev->mode_info.audio.pin[i].connected = false;
+		adev->mode_info.audio.pin[i].offset = pin_offsets[i];
+		adev->mode_info.audio.pin[i].id = i;
+		/* disable audio.  it will be set up later */
+		/* XXX remove once we switch to ip funcs */
+		dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	return 0;
+}
+
+static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_audio)
+		return;
+
+	if (!adev->mode_info.audio.enabled)
+		return;
+
+	adev->mode_info.audio.enabled = false;
+}
+
+/*
+ * update the N and CTS parameters for a given pixel clock rate
+ */
+static void dce_v8_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	uint32_t offset = dig->afmt->offset;
+
+	WREG32(mmHDMI_ACR_32_0 + offset, (acr.cts_32khz << HDMI_ACR_32_0__HDMI_ACR_CTS_32__SHIFT));
+	WREG32(mmHDMI_ACR_32_1 + offset, acr.n_32khz);
+
+	WREG32(mmHDMI_ACR_44_0 + offset, (acr.cts_44_1khz << HDMI_ACR_44_0__HDMI_ACR_CTS_44__SHIFT));
+	WREG32(mmHDMI_ACR_44_1 + offset, acr.n_44_1khz);
+
+	WREG32(mmHDMI_ACR_48_0 + offset, (acr.cts_48khz << HDMI_ACR_48_0__HDMI_ACR_CTS_48__SHIFT));
+	WREG32(mmHDMI_ACR_48_1 + offset, acr.n_48khz);
+}
+
+/*
+ * build a HDMI Video Info Frame
+ */
+static void dce_v8_0_afmt_update_avi_infoframe(struct drm_encoder *encoder,
+					       void *buffer, size_t size)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	uint32_t offset = dig->afmt->offset;
+	uint8_t *frame = buffer + 3;
+	uint8_t *header = buffer;
+
+	WREG32(mmAFMT_AVI_INFO0 + offset,
+		frame[0x0] | (frame[0x1] << 8) | (frame[0x2] << 16) | (frame[0x3] << 24));
+	WREG32(mmAFMT_AVI_INFO1 + offset,
+		frame[0x4] | (frame[0x5] << 8) | (frame[0x6] << 16) | (frame[0x7] << 24));
+	WREG32(mmAFMT_AVI_INFO2 + offset,
+		frame[0x8] | (frame[0x9] << 8) | (frame[0xA] << 16) | (frame[0xB] << 24));
+	WREG32(mmAFMT_AVI_INFO3 + offset,
+		frame[0xC] | (frame[0xD] << 8) | (header[1] << 24));
+}
+
+static void dce_v8_0_audio_set_dto(struct drm_encoder *encoder, u32 clock)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+	u32 dto_phase = 24 * 1000;
+	u32 dto_modulo = clock;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* XXX two dtos; generally use dto0 for hdmi */
+	/* Express [24MHz / target pixel clock] as an exact rational
+	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
+	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
+	 */
+	WREG32(mmDCCG_AUDIO_DTO_SOURCE, (amdgpu_crtc->crtc_id << DCCG_AUDIO_DTO_SOURCE__DCCG_AUDIO_DTO0_SOURCE_SEL__SHIFT));
+	WREG32(mmDCCG_AUDIO_DTO0_PHASE, dto_phase);
+	WREG32(mmDCCG_AUDIO_DTO0_MODULE, dto_modulo);
+}
+
+/*
+ * update the info frames with the data from the current display mode
+ */
+static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
+	struct hdmi_avi_infoframe frame;
+	uint32_t offset, val;
+	ssize_t err;
+	int bpc = 8;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (!dig->afmt->enabled)
+		return;
+
+	offset = dig->afmt->offset;
+
+	/* hdmi deep color mode general control packets setup, if bpc > 8 */
+	if (encoder->crtc) {
+		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc);
+		bpc = amdgpu_crtc->bpc;
+	}
+
+	/* disable audio prior to setting up hw */
+	dig->afmt->pin = dce_v8_0_audio_get_pin(adev);
+	dce_v8_0_audio_enable(adev, dig->afmt->pin, false);
+
+	dce_v8_0_audio_set_dto(encoder, mode->clock);
+
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + offset,
+	       HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK); /* send null packets when required */
+
+	WREG32(mmAFMT_AUDIO_CRC_CONTROL + offset, 0x1000);
+
+	val = RREG32(mmHDMI_CONTROL + offset);
+	val &= ~HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+	val &= ~HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH_MASK;
+
+	switch (bpc) {
+	case 0:
+	case 6:
+	case 8:
+	case 16:
+	default:
+		DRM_DEBUG("%s: Disabling hdmi deep color for %d bpc.\n",
+			  connector->name, bpc);
+		break;
+	case 10:
+		val |= HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+		val |= 1 << HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH__SHIFT;
+		DRM_DEBUG("%s: Enabling hdmi deep color 30 for 10 bpc.\n",
+			  connector->name);
+		break;
+	case 12:
+		val |= HDMI_CONTROL__HDMI_DEEP_COLOR_ENABLE_MASK;
+		val |= 2 << HDMI_CONTROL__HDMI_DEEP_COLOR_DEPTH__SHIFT;
+		DRM_DEBUG("%s: Enabling hdmi deep color 36 for 12 bpc.\n",
+			  connector->name);
+		break;
+	}
+
+	WREG32(mmHDMI_CONTROL + offset, val);
+
+	WREG32(mmHDMI_VBI_PACKET_CONTROL + offset,
+	       HDMI_VBI_PACKET_CONTROL__HDMI_NULL_SEND_MASK | /* send null packets when required */
+	       HDMI_VBI_PACKET_CONTROL__HDMI_GC_SEND_MASK | /* send general control packets */
+	       HDMI_VBI_PACKET_CONTROL__HDMI_GC_CONT_MASK); /* send general control packets every frame */
+
+	WREG32(mmHDMI_INFOFRAME_CONTROL0 + offset,
+	       HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_SEND_MASK | /* enable audio info frames (frames won't be set until audio is enabled) */
+	       HDMI_INFOFRAME_CONTROL0__HDMI_AUDIO_INFO_CONT_MASK); /* required for audio info values to be updated */
+
+	WREG32(mmAFMT_INFOFRAME_CONTROL0 + offset,
+	       AFMT_INFOFRAME_CONTROL0__AFMT_AUDIO_INFO_UPDATE_MASK); /* required for audio info values to be updated */
+
+	WREG32(mmHDMI_INFOFRAME_CONTROL1 + offset,
+	       (2 << HDMI_INFOFRAME_CONTROL1__HDMI_AUDIO_INFO_LINE__SHIFT)); /* anything other than 0 */
+
+	WREG32(mmHDMI_GC + offset, 0); /* unset HDMI_GC_AVMUTE */
+
+	WREG32(mmHDMI_AUDIO_PACKET_CONTROL + offset,
+	       (1 << HDMI_AUDIO_PACKET_CONTROL__HDMI_AUDIO_DELAY_EN__SHIFT) | /* set the default audio delay */
+	       (3 << HDMI_AUDIO_PACKET_CONTROL__HDMI_AUDIO_PACKETS_PER_LINE__SHIFT)); /* should be suffient for all audio modes and small enough for all hblanks */
+
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL + offset,
+	       AFMT_AUDIO_PACKET_CONTROL__AFMT_60958_CS_UPDATE_MASK); /* allow 60958 channel status fields to be updated */
+
+	/* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */
+
+	if (bpc > 8)
+		WREG32(mmHDMI_ACR_PACKET_CONTROL + offset,
+		       HDMI_ACR_PACKET_CONTROL__HDMI_ACR_AUTO_SEND_MASK); /* allow hw to sent ACR packets when required */
+	else
+		WREG32(mmHDMI_ACR_PACKET_CONTROL + offset,
+		       HDMI_ACR_PACKET_CONTROL__HDMI_ACR_SOURCE_MASK | /* select SW CTS value */
+		       HDMI_ACR_PACKET_CONTROL__HDMI_ACR_AUTO_SEND_MASK); /* allow hw to sent ACR packets when required */
+
+	dce_v8_0_afmt_update_ACR(encoder, mode->clock);
+
+	WREG32(mmAFMT_60958_0 + offset,
+	       (1 << AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L__SHIFT));
+
+	WREG32(mmAFMT_60958_1 + offset,
+	       (2 << AFMT_60958_1__AFMT_60958_CS_CHANNEL_NUMBER_R__SHIFT));
+
+	WREG32(mmAFMT_60958_2 + offset,
+	       (3 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_2__SHIFT) |
+	       (4 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_3__SHIFT) |
+	       (5 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_4__SHIFT) |
+	       (6 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_5__SHIFT) |
+	       (7 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_6__SHIFT) |
+	       (8 << AFMT_60958_2__AFMT_60958_CS_CHANNEL_NUMBER_7__SHIFT));
+
+	dce_v8_0_audio_write_speaker_allocation(encoder);
+
+
+	WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + offset,
+	       (0xff << AFMT_AUDIO_PACKET_CONTROL2__AFMT_AUDIO_CHANNEL_ENABLE__SHIFT));
+
+	dce_v8_0_afmt_audio_select_pin(encoder);
+	dce_v8_0_audio_write_sad_regs(encoder);
+	dce_v8_0_audio_write_latency_fields(encoder, mode);
+
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+	if (err < 0) {
+		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		DRM_ERROR("failed to pack AVI infoframe: %zd\n", err);
+		return;
+	}
+
+	dce_v8_0_afmt_update_avi_infoframe(encoder, buffer, sizeof(buffer));
+
+	WREG32_OR(mmHDMI_INFOFRAME_CONTROL0 + offset,
+		  HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_SEND_MASK | /* enable AVI info frames */
+		  HDMI_INFOFRAME_CONTROL0__HDMI_AVI_INFO_CONT_MASK); /* required for audio info values to be updated */
+
+	WREG32_P(mmHDMI_INFOFRAME_CONTROL1 + offset,
+		 (2 << HDMI_INFOFRAME_CONTROL1__HDMI_AVI_INFO_LINE__SHIFT), /* anything other than 0 */
+		 ~HDMI_INFOFRAME_CONTROL1__HDMI_AVI_INFO_LINE_MASK);
+
+	WREG32_OR(mmAFMT_AUDIO_PACKET_CONTROL + offset,
+		  AFMT_AUDIO_PACKET_CONTROL__AFMT_AUDIO_SAMPLE_SEND_MASK); /* send audio packets */
+
+	WREG32(mmAFMT_RAMP_CONTROL0 + offset, 0x00FFFFFF);
+	WREG32(mmAFMT_RAMP_CONTROL1 + offset, 0x007FFFFF);
+	WREG32(mmAFMT_RAMP_CONTROL2 + offset, 0x00000001);
+	WREG32(mmAFMT_RAMP_CONTROL3 + offset, 0x00000001);
+
+	/* enable audio after setting up hw */
+	dce_v8_0_audio_enable(adev, dig->afmt->pin, true);
+}
+
+static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	if (!dig || !dig->afmt)
+		return;
+
+	/* Silent, r600_hdmi_enable will raise WARN for us */
+	if (enable && dig->afmt->enabled)
+		return;
+	if (!enable && !dig->afmt->enabled)
+		return;
+
+	if (!enable && dig->afmt->pin) {
+		dce_v8_0_audio_enable(adev, dig->afmt->pin, false);
+		dig->afmt->pin = NULL;
+	}
+
+	dig->afmt->enabled = enable;
+
+	DRM_DEBUG("%sabling AFMT interface @ 0x%04X for encoder 0x%x\n",
+		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
+}
+
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++)
+		adev->mode_info.afmt[i] = NULL;
+
+	/* DCE8 has audio blocks tied to DIG encoders */
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		adev->mode_info.afmt[i] = kzalloc(sizeof(struct amdgpu_afmt), GFP_KERNEL);
+		if (adev->mode_info.afmt[i]) {
+			adev->mode_info.afmt[i]->offset = dig_offsets[i];
+			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
+{
+	int i;
+
+	for (i = 0; i < adev->mode_info.num_dig; i++) {
+		kfree(adev->mode_info.afmt[i]);
+		adev->mode_info.afmt[i] = NULL;
+	}
+}
+
+static const u32 vga_control_regs[6] = {
+	mmD1VGA_CONTROL,
+	mmD2VGA_CONTROL,
+	mmD3VGA_CONTROL,
+	mmD4VGA_CONTROL,
+	mmD5VGA_CONTROL,
+	mmD6VGA_CONTROL,
+};
+
+static void dce_v8_0_vga_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 vga_control;
+
+	vga_control = RREG32(vga_control_regs[amdgpu_crtc->crtc_id]) & ~1;
+	if (enable)
+		WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control | 1);
+	else
+		WREG32(vga_control_regs[amdgpu_crtc->crtc_id], vga_control);
+}
+
+static void dce_v8_0_grph_enable(struct drm_crtc *crtc, bool enable)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	if (enable)
+		WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 1);
+	else
+		WREG32(mmGRPH_ENABLE + amdgpu_crtc->crtc_offset, 0);
+}
+
+static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
+				     struct drm_framebuffer *fb,
+				     int x, int y, int atomic)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct drm_framebuffer *target_fb;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *abo;
+	uint64_t fb_location, tiling_flags;
+	uint32_t fb_format, fb_pitch_pixels;
+	u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+	u32 pipe_config;
+	u32 viewport_w, viewport_h;
+	int r;
+	bool bypass_lut = false;
+
+	/* no fb bound */
+	if (!atomic && !crtc->primary->fb) {
+		DRM_DEBUG_KMS("No FB bound\n");
+		return 0;
+	}
+
+	if (atomic)
+		target_fb = fb;
+	else
+		target_fb = crtc->primary->fb;
+
+	/* If atomic, assume fb object is pinned & idle & fenced and
+	 * just update base pointers
+	 */
+	obj = target_fb->obj[0];
+	abo = gem_to_amdgpu_bo(obj);
+	r = amdgpu_bo_reserve(abo, false);
+	if (unlikely(r != 0))
+		return r;
+
+	if (!atomic) {
+		abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
+		if (unlikely(r != 0)) {
+			amdgpu_bo_unreserve(abo);
+			return -EINVAL;
+		}
+	}
+	fb_location = amdgpu_bo_gpu_offset(abo);
+
+	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
+	amdgpu_bo_unreserve(abo);
+
+	pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+
+	switch (target_fb->format->format) {
+	case DRM_FORMAT_C8:
+		fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+		break;
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_ARGB4444:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_BGRX5551:
+	case DRM_FORMAT_BGRA5551:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_RGB565:
+		fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_BGRX1010102:
+	case DRM_FORMAT_BGRA1010102:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+			     (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
+		bypass_lut = true;
+		break;
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+				(GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+		fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+			(GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+#ifdef __BIG_ENDIAN
+		fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+		break;
+	default:
+		DRM_ERROR("Unsupported screen format %p4cc\n",
+			  &target_fb->format->format);
+		return -EINVAL;
+	}
+
+	if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_2D_TILED_THIN1) {
+		unsigned bankw, bankh, mtaspect, tile_split, num_banks;
+
+		bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+		bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+		mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+		tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
+		num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+
+		fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+		fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+		fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+		fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+		fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+		fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
+		fb_format |= (DISPLAY_MICRO_TILING << GRPH_CONTROL__GRPH_MICRO_TILE_MODE__SHIFT);
+	} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
+		fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+	}
+
+	fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
+
+	dce_v8_0_vga_enable(crtc, false);
+
+	/* Make sure surface address is updated at vertical blank rather than
+	 * horizontal blank
+	 */
+	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+	WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
+
+	/*
+	 * The LUT only has 256 slots for indexing by a 8 bpc fb. Bypass the LUT
+	 * for > 8 bpc scanout to avoid truncation of fb indices to 8 msb's, to
+	 * retain the full precision throughout the pipeline.
+	 */
+	WREG32_P(mmGRPH_LUT_10BIT_BYPASS_CONTROL + amdgpu_crtc->crtc_offset,
+		 (bypass_lut ? LUT_10BIT_BYPASS_EN : 0),
+		 ~LUT_10BIT_BYPASS_EN);
+
+	if (bypass_lut)
+		DRM_DEBUG_KMS("Bypassing hardware LUT due to 10 bit fb scanout.\n");
+
+	WREG32(mmGRPH_SURFACE_OFFSET_X + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_SURFACE_OFFSET_Y + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_Y_START + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmGRPH_X_END + amdgpu_crtc->crtc_offset, target_fb->width);
+	WREG32(mmGRPH_Y_END + amdgpu_crtc->crtc_offset, target_fb->height);
+
+	fb_pitch_pixels = target_fb->pitches[0] / target_fb->format->cpp[0];
+	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, fb_pitch_pixels);
+
+	dce_v8_0_grph_enable(crtc, true);
+
+	WREG32(mmLB_DESKTOP_HEIGHT + amdgpu_crtc->crtc_offset,
+	       target_fb->height);
+
+	x &= ~3;
+	y &= ~1;
+	WREG32(mmVIEWPORT_START + amdgpu_crtc->crtc_offset,
+	       (x << 16) | y);
+	viewport_w = crtc->mode.hdisplay;
+	viewport_h = (crtc->mode.vdisplay + 1) & ~1;
+	WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset,
+	       (viewport_w << 16) | viewport_h);
+
+	/* set pageflip to happen anywhere in vblank interval */
+	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
+
+	if (!atomic && fb && fb != crtc->primary->fb) {
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r != 0))
+			return r;
+		amdgpu_bo_unpin(abo);
+		amdgpu_bo_unreserve(abo);
+	}
+
+	/* Bytes per pixel may have changed */
+	dce_v8_0_bandwidth_update(adev);
+
+	return 0;
+}
+
+static void dce_v8_0_set_interleave(struct drm_crtc *crtc,
+				    struct drm_display_mode *mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset,
+		       LB_DATA_FORMAT__INTERLEAVE_EN__SHIFT);
+	else
+		WREG32(mmLB_DATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
+}
+
+static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u16 *r, *g, *b;
+	int i;
+
+	DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
+
+	WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+		(INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+	WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
+	       PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
+	WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
+	       PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
+	WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+		(INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+
+	WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_BLACK_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_BLACK_OFFSET_RED + amdgpu_crtc->crtc_offset, 0);
+
+	WREG32(mmDC_LUT_WHITE_OFFSET_BLUE + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_GREEN + amdgpu_crtc->crtc_offset, 0xffff);
+	WREG32(mmDC_LUT_WHITE_OFFSET_RED + amdgpu_crtc->crtc_offset, 0xffff);
+
+	WREG32(mmDC_LUT_RW_MODE + amdgpu_crtc->crtc_offset, 0);
+	WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007);
+
+	WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0);
+	r = crtc->gamma_store;
+	g = r + crtc->gamma_size;
+	b = g + crtc->gamma_size;
+	for (i = 0; i < 256; i++) {
+		WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset,
+		       ((*r++ & 0xffc0) << 14) |
+		       ((*g++ & 0xffc0) << 4) |
+		       (*b++ >> 6));
+	}
+
+	WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+		(DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+	WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+		(GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+	WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+		(REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+	WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
+	       ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+		(OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+	/* XXX match this to the depth of the crtc fmt block, move to modeset? */
+	WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
+	/* XXX this only needs to be programmed once per crtc at startup,
+	 * not sure where the best place for it is
+	 */
+	WREG32(mmALPHA_CONTROL + amdgpu_crtc->crtc_offset,
+	       ALPHA_CONTROL__CURSOR_ALPHA_BLND_ENA_MASK);
+}
+
+static int dce_v8_0_pick_dig_encoder(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		if (dig->linkb)
+			return 1;
+		else
+			return 0;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		if (dig->linkb)
+			return 3;
+		else
+			return 2;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		if (dig->linkb)
+			return 5;
+		else
+			return 4;
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		return 6;
+	default:
+		DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);
+		return 0;
+	}
+}
+
+/**
+ * dce_v8_0_pick_pll - Allocate a PPLL for use by the crtc.
+ *
+ * @crtc: drm crtc
+ *
+ * Returns the PPLL (Pixel PLL) to be used by the crtc.  For DP monitors
+ * a single PPLL can be used for all DP crtcs/encoders.  For non-DP
+ * monitors a dedicated PPLL must be used.  If a particular board has
+ * an external DP PLL, return ATOM_PPLL_INVALID to skip PLL programming
+ * as there is no need to program the PLL itself.  If we are not able to
+ * allocate a PLL, return ATOM_PPLL_INVALID to skip PLL programming to
+ * avoid messing up an existing monitor.
+ *
+ * Asic specific PLL information
+ *
+ * DCE 8.x
+ * KB/KV
+ * - PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP)
+ * CI
+ * - PPLL0, PPLL1, PPLL2 are available for all UNIPHY (both DP and non-DP) and DAC
+ *
+ */
+static u32 dce_v8_0_pick_pll(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	u32 pll_in_use;
+	int pll;
+
+	if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) {
+		if (adev->clock.dp_extclk)
+			/* skip PPLL programming if using ext clock */
+			return ATOM_PPLL_INVALID;
+		else {
+			/* use the same PPLL for all DP monitors */
+			pll = amdgpu_pll_get_shared_dp_ppll(crtc);
+			if (pll != ATOM_PPLL_INVALID)
+				return pll;
+		}
+	} else {
+		/* use the same PPLL for all monitors with the same clock */
+		pll = amdgpu_pll_get_shared_nondp_ppll(crtc);
+		if (pll != ATOM_PPLL_INVALID)
+			return pll;
+	}
+	/* otherwise, pick one of the plls */
+	if ((adev->asic_type == CHIP_KABINI) ||
+	    (adev->asic_type == CHIP_MULLINS)) {
+		/* KB/ML has PPLL1 and PPLL2 */
+		pll_in_use = amdgpu_pll_get_use_mask(crtc);
+		if (!(pll_in_use & (1 << ATOM_PPLL2)))
+			return ATOM_PPLL2;
+		if (!(pll_in_use & (1 << ATOM_PPLL1)))
+			return ATOM_PPLL1;
+		DRM_ERROR("unable to allocate a PPLL\n");
+		return ATOM_PPLL_INVALID;
+	} else {
+		/* CI/KV has PPLL0, PPLL1, and PPLL2 */
+		pll_in_use = amdgpu_pll_get_use_mask(crtc);
+		if (!(pll_in_use & (1 << ATOM_PPLL2)))
+			return ATOM_PPLL2;
+		if (!(pll_in_use & (1 << ATOM_PPLL1)))
+			return ATOM_PPLL1;
+		if (!(pll_in_use & (1 << ATOM_PPLL0)))
+			return ATOM_PPLL0;
+		DRM_ERROR("unable to allocate a PPLL\n");
+		return ATOM_PPLL_INVALID;
+	}
+	return ATOM_PPLL_INVALID;
+}
+
+static void dce_v8_0_lock_cursor(struct drm_crtc *crtc, bool lock)
+{
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	uint32_t cur_lock;
+
+	cur_lock = RREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset);
+	if (lock)
+		cur_lock |= CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+	else
+		cur_lock &= ~CUR_UPDATE__CURSOR_UPDATE_LOCK_MASK;
+	WREG32(mmCUR_UPDATE + amdgpu_crtc->crtc_offset, cur_lock);
+}
+
+static void dce_v8_0_hide_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static void dce_v8_0_show_cursor(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+
+	WREG32(mmCUR_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
+	       upper_32_bits(amdgpu_crtc->cursor_addr));
+	WREG32(mmCUR_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
+	       lower_32_bits(amdgpu_crtc->cursor_addr));
+
+	WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
+	       CUR_CONTROL__CURSOR_EN_MASK |
+	       (CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
+	       (CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
+}
+
+static int dce_v8_0_cursor_move_locked(struct drm_crtc *crtc,
+				       int x, int y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_device *adev = drm_to_adev(crtc->dev);
+	int xorigin = 0, yorigin = 0;
+
+	amdgpu_crtc->cursor_x = x;
+	amdgpu_crtc->cursor_y = y;
+
+	/* avivo cursor are offset into the total surface */
+	x += crtc->x;
+	y += crtc->y;
+	DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+	if (x < 0) {
+		xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1);
+		x = 0;
+	}
+	if (y < 0) {
+		yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1);
+		y = 0;
+	}
+
+	WREG32(mmCUR_POSITION + amdgpu_crtc->crtc_offset, (x << 16) | y);
+	WREG32(mmCUR_HOT_SPOT + amdgpu_crtc->crtc_offset, (xorigin << 16) | yorigin);
+	WREG32(mmCUR_SIZE + amdgpu_crtc->crtc_offset,
+	       ((amdgpu_crtc->cursor_width - 1) << 16) | (amdgpu_crtc->cursor_height - 1));
+
+	return 0;
+}
+
+static int dce_v8_0_crtc_cursor_move(struct drm_crtc *crtc,
+				     int x, int y)
+{
+	int ret;
+
+	dce_v8_0_lock_cursor(crtc, true);
+	ret = dce_v8_0_cursor_move_locked(crtc, x, y);
+	dce_v8_0_lock_cursor(crtc, false);
+
+	return ret;
+}
+
+static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
+				     struct drm_file *file_priv,
+				     uint32_t handle,
+				     uint32_t width,
+				     uint32_t height,
+				     int32_t hot_x,
+				     int32_t hot_y)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *aobj;
+	int ret;
+
+	if (!handle) {
+		/* turn off cursor */
+		dce_v8_0_hide_cursor(crtc);
+		obj = NULL;
+		goto unpin;
+	}
+
+	if ((width > amdgpu_crtc->max_cursor_width) ||
+	    (height > amdgpu_crtc->max_cursor_height)) {
+		DRM_ERROR("bad cursor width or height %d x %d\n", width, height);
+		return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id);
+		return -ENOENT;
+	}
+
+	aobj = gem_to_amdgpu_bo(obj);
+	ret = amdgpu_bo_reserve(aobj, false);
+	if (ret != 0) {
+		drm_gem_object_put(obj);
+		return ret;
+	}
+
+	aobj->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+	amdgpu_bo_unreserve(aobj);
+	if (ret) {
+		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
+		drm_gem_object_put(obj);
+		return ret;
+	}
+	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+
+	dce_v8_0_lock_cursor(crtc, true);
+
+	if (width != amdgpu_crtc->cursor_width ||
+	    height != amdgpu_crtc->cursor_height ||
+	    hot_x != amdgpu_crtc->cursor_hot_x ||
+	    hot_y != amdgpu_crtc->cursor_hot_y) {
+		int x, y;
+
+		x = amdgpu_crtc->cursor_x + amdgpu_crtc->cursor_hot_x - hot_x;
+		y = amdgpu_crtc->cursor_y + amdgpu_crtc->cursor_hot_y - hot_y;
+
+		dce_v8_0_cursor_move_locked(crtc, x, y);
+
+		amdgpu_crtc->cursor_width = width;
+		amdgpu_crtc->cursor_height = height;
+		amdgpu_crtc->cursor_hot_x = hot_x;
+		amdgpu_crtc->cursor_hot_y = hot_y;
+	}
+
+	dce_v8_0_show_cursor(crtc);
+	dce_v8_0_lock_cursor(crtc, false);
+
+unpin:
+	if (amdgpu_crtc->cursor_bo) {
+		struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+		ret = amdgpu_bo_reserve(aobj, true);
+		if (likely(ret == 0)) {
+			amdgpu_bo_unpin(aobj);
+			amdgpu_bo_unreserve(aobj);
+		}
+		drm_gem_object_put(amdgpu_crtc->cursor_bo);
+	}
+
+	amdgpu_crtc->cursor_bo = obj;
+	return 0;
+}
+
+static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (amdgpu_crtc->cursor_bo) {
+		dce_v8_0_lock_cursor(crtc, true);
+
+		dce_v8_0_cursor_move_locked(crtc, amdgpu_crtc->cursor_x,
+					    amdgpu_crtc->cursor_y);
+
+		dce_v8_0_show_cursor(crtc);
+
+		dce_v8_0_lock_cursor(crtc, false);
+	}
+}
+
+static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				   u16 *blue, uint32_t size,
+				   struct drm_modeset_acquire_ctx *ctx)
+{
+	dce_v8_0_crtc_load_lut(crtc);
+
+	return 0;
+}
+
+static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	drm_crtc_cleanup(crtc);
+	kfree(amdgpu_crtc);
+}
+
+static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
+	.cursor_set2 = dce_v8_0_crtc_cursor_set2,
+	.cursor_move = dce_v8_0_crtc_cursor_move,
+	.gamma_set = dce_v8_0_crtc_gamma_set,
+	.set_config = amdgpu_display_crtc_set_config,
+	.destroy = dce_v8_0_crtc_destroy,
+	.page_flip_target = amdgpu_display_crtc_page_flip_target,
+	.get_vblank_counter = amdgpu_get_vblank_counter_kms,
+	.enable_vblank = amdgpu_enable_vblank_kms,
+	.disable_vblank = amdgpu_disable_vblank_kms,
+	.get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
+};
+
+static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	unsigned type;
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+		amdgpu_crtc->enabled = true;
+		amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
+		dce_v8_0_vga_enable(crtc, true);
+		amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
+		dce_v8_0_vga_enable(crtc, false);
+		/* Make sure VBLANK and PFLIP interrupts are still enabled */
+		type = amdgpu_display_crtc_idx_to_irq_type(adev,
+						amdgpu_crtc->crtc_id);
+		amdgpu_irq_update(adev, &adev->crtc_irq, type);
+		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
+		drm_crtc_vblank_on(crtc);
+		dce_v8_0_crtc_load_lut(crtc);
+		break;
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+	case DRM_MODE_DPMS_OFF:
+		drm_crtc_vblank_off(crtc);
+		if (amdgpu_crtc->enabled) {
+			dce_v8_0_vga_enable(crtc, true);
+			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
+			dce_v8_0_vga_enable(crtc, false);
+		}
+		amdgpu_atombios_crtc_enable(crtc, ATOM_DISABLE);
+		amdgpu_crtc->enabled = false;
+		break;
+	}
+	/* adjust pm to dpms */
+	amdgpu_dpm_compute_clocks(adev);
+}
+
+static void dce_v8_0_crtc_prepare(struct drm_crtc *crtc)
+{
+	/* disable crtc pair power gating before programming */
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_DISABLE);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_ENABLE);
+	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void dce_v8_0_crtc_commit(struct drm_crtc *crtc)
+{
+	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_crtc_lock(crtc, ATOM_DISABLE);
+}
+
+static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+	struct amdgpu_atom_ss ss;
+	int i;
+
+	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	if (crtc->primary->fb) {
+		int r;
+		struct amdgpu_bo *abo;
+
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
+		r = amdgpu_bo_reserve(abo, true);
+		if (unlikely(r))
+			DRM_ERROR("failed to reserve abo before unpin\n");
+		else {
+			amdgpu_bo_unpin(abo);
+			amdgpu_bo_unreserve(abo);
+		}
+	}
+	/* disable the GRPH */
+	dce_v8_0_grph_enable(crtc, false);
+
+	amdgpu_atombios_crtc_powergate(crtc, ATOM_ENABLE);
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		if (adev->mode_info.crtcs[i] &&
+		    adev->mode_info.crtcs[i]->enabled &&
+		    i != amdgpu_crtc->crtc_id &&
+		    amdgpu_crtc->pll_id == adev->mode_info.crtcs[i]->pll_id) {
+			/* one other crtc is using this pll don't turn
+			 * off the pll
+			 */
+			goto done;
+		}
+	}
+
+	switch (amdgpu_crtc->pll_id) {
+	case ATOM_PPLL1:
+	case ATOM_PPLL2:
+		/* disable the ppll */
+		amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+						 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+		break;
+	case ATOM_PPLL0:
+		/* disable the ppll */
+		if ((adev->asic_type == CHIP_KAVERI) ||
+		    (adev->asic_type == CHIP_BONAIRE) ||
+		    (adev->asic_type == CHIP_HAWAII))
+			amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id,
+						  0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
+		break;
+	default:
+		break;
+	}
+done:
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+}
+
+static int dce_v8_0_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y, struct drm_framebuffer *old_fb)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+	if (!amdgpu_crtc->adjusted_clock)
+		return -EINVAL;
+
+	amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode);
+	amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode);
+	dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+	amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode);
+	amdgpu_atombios_crtc_scaler_setup(crtc);
+	dce_v8_0_cursor_reset(crtc);
+	/* update the hw version fpr dpm */
+	amdgpu_crtc->hw_mode = *adjusted_mode;
+
+	return 0;
+}
+
+static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc,
+				     const struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	/* assign the encoder to the amdgpu crtc to avoid repeated lookups later */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc == crtc) {
+			amdgpu_crtc->encoder = encoder;
+			amdgpu_crtc->connector = amdgpu_get_connector_for_encoder(encoder);
+			break;
+		}
+	}
+	if ((amdgpu_crtc->encoder == NULL) || (amdgpu_crtc->connector == NULL)) {
+		amdgpu_crtc->encoder = NULL;
+		amdgpu_crtc->connector = NULL;
+		return false;
+	}
+	if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+		return false;
+	if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
+		return false;
+	/* pick pll */
+	amdgpu_crtc->pll_id = dce_v8_0_pick_pll(crtc);
+	/* if we can't get a PPLL for a non-DP encoder, fail */
+	if ((amdgpu_crtc->pll_id == ATOM_PPLL_INVALID) &&
+	    !ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder)))
+		return false;
+
+	return true;
+}
+
+static int dce_v8_0_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				  struct drm_framebuffer *old_fb)
+{
+	return dce_v8_0_crtc_do_set_base(crtc, old_fb, x, y, 0);
+}
+
+static int dce_v8_0_crtc_set_base_atomic(struct drm_crtc *crtc,
+					 struct drm_framebuffer *fb,
+					 int x, int y, enum mode_set_atomic state)
+{
+	return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1);
+}
+
+static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
+	.dpms = dce_v8_0_crtc_dpms,
+	.mode_fixup = dce_v8_0_crtc_mode_fixup,
+	.mode_set = dce_v8_0_crtc_mode_set,
+	.mode_set_base = dce_v8_0_crtc_set_base,
+	.mode_set_base_atomic = dce_v8_0_crtc_set_base_atomic,
+	.prepare = dce_v8_0_crtc_prepare,
+	.commit = dce_v8_0_crtc_commit,
+	.disable = dce_v8_0_crtc_disable,
+	.get_scanout_position = amdgpu_crtc_get_scanout_position,
+};
+
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+	struct drm_framebuffer *fb;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_device *adev;
+	uint32_t fb_format;
+
+	if (!plane->fb)
+		return;
+
+	fb = plane->fb;
+	amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+	adev = drm_to_adev(fb->dev);
+
+	/* Disable DC tiling */
+	fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+	fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+	WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+	.get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+	.panic_flush = dce_v8_0_panic_flush,
+};
+
+static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
+{
+	struct amdgpu_crtc *amdgpu_crtc;
+
+	amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) +
+			      (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
+	if (amdgpu_crtc == NULL)
+		return -ENOMEM;
+
+	drm_crtc_init(adev_to_drm(adev), &amdgpu_crtc->base, &dce_v8_0_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
+	amdgpu_crtc->crtc_id = index;
+	adev->mode_info.crtcs[index] = amdgpu_crtc;
+
+	amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
+	amdgpu_crtc->max_cursor_height = CIK_CURSOR_HEIGHT;
+	adev_to_drm(adev)->mode_config.cursor_width = amdgpu_crtc->max_cursor_width;
+	adev_to_drm(adev)->mode_config.cursor_height = amdgpu_crtc->max_cursor_height;
+
+	amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id];
+
+	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
+	amdgpu_crtc->adjusted_clock = 0;
+	amdgpu_crtc->encoder = NULL;
+	amdgpu_crtc->connector = NULL;
+	drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+	drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
+
+	return 0;
+}
+
+static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
+	adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
+
+	dce_v8_0_set_display_funcs(adev);
+
+	adev->mode_info.num_crtc = dce_v8_0_get_num_crtc(adev);
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+		adev->mode_info.num_hpd = 6;
+		adev->mode_info.num_dig = 6;
+		break;
+	case CHIP_KAVERI:
+		adev->mode_info.num_hpd = 6;
+		adev->mode_info.num_dig = 7;
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		adev->mode_info.num_hpd = 6;
+		adev->mode_info.num_dig = 6; /* ? */
+		break;
+	default:
+		/* FIXME: not supported yet */
+		return -EINVAL;
+	}
+
+	dce_v8_0_set_irq_funcs(adev);
+
+	return 0;
+}
+
+static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r, i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+		if (r)
+			return r;
+	}
+
+	for (i = 8; i < 20; i += 2) {
+		r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+		if (r)
+			return r;
+	}
+
+	/* HPD hotplug */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+	if (r)
+		return r;
+
+	adev_to_drm(adev)->mode_config.funcs = &amdgpu_mode_funcs;
+
+	adev_to_drm(adev)->mode_config.async_page_flip = true;
+
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+
+	adev_to_drm(adev)->mode_config.preferred_depth = 24;
+	if (adev->asic_type == CHIP_HAWAII)
+		/* disable prefer shadow for now due to hibernation issues */
+		adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+	else
+		adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+	r = amdgpu_display_modeset_create_props(adev);
+	if (r)
+		return r;
+
+	adev_to_drm(adev)->mode_config.max_width = 16384;
+	adev_to_drm(adev)->mode_config.max_height = 16384;
+
+	/* allocate crtcs */
+	for (i = 0; i < adev->mode_info.num_crtc; i++) {
+		r = dce_v8_0_crtc_init(adev, i);
+		if (r)
+			return r;
+	}
+
+	if (amdgpu_atombios_get_connector_info_from_object_table(adev))
+		amdgpu_display_print_display_setup(adev_to_drm(adev));
+	else
+		return -EINVAL;
+
+	/* setup afmt */
+	r = dce_v8_0_afmt_init(adev);
+	if (r)
+		return r;
+
+	r = dce_v8_0_audio_init(adev);
+	if (r)
+		return r;
+
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	/* Pre-DCE11 */
+	INIT_DELAYED_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
+	drm_kms_helper_poll_init(adev_to_drm(adev));
+
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
+}
+
+static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	drm_edid_free(adev->mode_info.bios_hardcoded_edid);
+
+	drm_kms_helper_poll_fini(adev_to_drm(adev));
+
+	dce_v8_0_audio_fini(adev);
+
+	dce_v8_0_afmt_fini(adev);
+
+	drm_mode_config_cleanup(adev_to_drm(adev));
+	adev->mode_info.mode_config_initialized = false;
+
+	return 0;
+}
+
+static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* disable vga render */
+	dce_v8_0_set_vga_render_state(adev, false);
+	/* init dig PHYs, disp eng pll */
+	amdgpu_atombios_encoder_init_dig(adev);
+	amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk);
+
+	/* initialize hpd */
+	dce_v8_0_hpd_init(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v8_0_pageflip_interrupt_init(adev);
+
+	return 0;
+}
+
+static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	dce_v8_0_hpd_fini(adev);
+
+	for (i = 0; i < adev->mode_info.audio.num_pins; i++) {
+		dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
+	}
+
+	dce_v8_0_pageflip_interrupt_fini(adev);
+
+	flush_delayed_work(&adev->hotplug_work);
+
+	return 0;
+}
+
+static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_display_suspend_helper(adev);
+	if (r)
+		return r;
+
+	adev->mode_info.bl_level =
+		amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
+	return dce_v8_0_hw_fini(ip_block);
+}
+
+static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int ret;
+
+	amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+							   adev->mode_info.bl_level);
+
+	ret = dce_v8_0_hw_init(ip_block);
+
+	/* turn on the BL */
+	if (adev->mode_info.bl_encoder) {
+		u8 bl_level = amdgpu_display_backlight_get_level(adev,
+								  adev->mode_info.bl_encoder);
+		amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
+						    bl_level);
+	}
+	if (ret)
+		return ret;
+
+	return amdgpu_display_resume_helper(adev);
+}
+
+static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	return true;
+}
+
+static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 srbm_soft_reset = 0, tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (dce_v8_0_is_display_hung(adev))
+		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+	if (srbm_soft_reset) {
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+		tmp |= srbm_soft_reset;
+		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~srbm_soft_reset;
+		WREG32(mmSRBM_SOFT_RESET, tmp);
+		tmp = RREG32(mmSRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+	return 0;
+}
+
+static void dce_v8_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
+						     int crtc,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 reg_block, lb_interrupt_mask;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (crtc) {
+	case 0:
+		reg_block = CRTC0_REGISTER_OFFSET;
+		break;
+	case 1:
+		reg_block = CRTC1_REGISTER_OFFSET;
+		break;
+	case 2:
+		reg_block = CRTC2_REGISTER_OFFSET;
+		break;
+	case 3:
+		reg_block = CRTC3_REGISTER_OFFSET;
+		break;
+	case 4:
+		reg_block = CRTC4_REGISTER_OFFSET;
+		break;
+	case 5:
+		reg_block = CRTC5_REGISTER_OFFSET;
+		break;
+	default:
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+		lb_interrupt_mask &= ~LB_INTERRUPT_MASK__VBLANK_INTERRUPT_MASK_MASK;
+		WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+		lb_interrupt_mask |= LB_INTERRUPT_MASK__VBLANK_INTERRUPT_MASK_MASK;
+		WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
+						    int crtc,
+						    enum amdgpu_interrupt_state state)
+{
+	u32 reg_block, lb_interrupt_mask;
+
+	if (crtc >= adev->mode_info.num_crtc) {
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (crtc) {
+	case 0:
+		reg_block = CRTC0_REGISTER_OFFSET;
+		break;
+	case 1:
+		reg_block = CRTC1_REGISTER_OFFSET;
+		break;
+	case 2:
+		reg_block = CRTC2_REGISTER_OFFSET;
+		break;
+	case 3:
+		reg_block = CRTC3_REGISTER_OFFSET;
+		break;
+	case 4:
+		reg_block = CRTC4_REGISTER_OFFSET;
+		break;
+	case 5:
+		reg_block = CRTC5_REGISTER_OFFSET;
+		break;
+	default:
+		DRM_DEBUG("invalid crtc %d\n", crtc);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+		lb_interrupt_mask &= ~LB_INTERRUPT_MASK__VLINE_INTERRUPT_MASK_MASK;
+		WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		lb_interrupt_mask = RREG32(mmLB_INTERRUPT_MASK + reg_block);
+		lb_interrupt_mask |= LB_INTERRUPT_MASK__VLINE_INTERRUPT_MASK_MASK;
+		WREG32(mmLB_INTERRUPT_MASK + reg_block, lb_interrupt_mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 dc_hpd_int_cntl;
+
+	if (type >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("invalid hpd %d\n", type);
+		return 0;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+		dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
+		dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+		WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
+					     struct amdgpu_irq_src *src,
+					     unsigned type,
+					     enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CRTC_IRQ_VBLANK1:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK2:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK3:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK4:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK5:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VBLANK6:
+		dce_v8_0_set_crtc_vblank_interrupt_state(adev, 5, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE1:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 0, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE2:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 1, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE3:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 2, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE4:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 3, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE5:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 4, state);
+		break;
+	case AMDGPU_CRTC_IRQ_VLINE6:
+		dce_v8_0_set_crtc_vline_interrupt_state(adev, 5, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	unsigned crtc = entry->src_id - 1;
+	uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
+	unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+								    crtc);
+
+	switch (entry->src_data[0]) {
+	case 0: /* vblank */
+		if (disp_int & interrupt_status_offsets[crtc].vblank)
+			WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		if (amdgpu_irq_enabled(adev, source, irq_type)) {
+			drm_handle_vblank(adev_to_drm(adev), crtc);
+		}
+		DRM_DEBUG("IH: D%d vblank\n", crtc + 1);
+		break;
+	case 1: /* vline */
+		if (disp_int & interrupt_status_offsets[crtc].vline)
+			WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK);
+		else
+			DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
+
+		DRM_DEBUG("IH: D%d vline\n", crtc + 1);
+		break;
+	default:
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		break;
+	}
+
+	return 0;
+}
+
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
+						 struct amdgpu_irq_src *src,
+						 unsigned type,
+						 enum amdgpu_interrupt_state state)
+{
+	u32 reg;
+
+	if (type >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", type);
+		return -EINVAL;
+	}
+
+	reg = RREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type]);
+	if (state == AMDGPU_IRQ_STATE_DISABLE)
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg & ~GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+	else
+		WREG32(mmGRPH_INTERRUPT_CONTROL + crtc_offsets[type],
+		       reg | GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK);
+
+	return 0;
+}
+
+static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	unsigned long flags;
+	unsigned crtc_id;
+	struct amdgpu_crtc *amdgpu_crtc;
+	struct amdgpu_flip_work *works;
+
+	crtc_id = (entry->src_id - 8) >> 1;
+	amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
+
+	if (crtc_id >= adev->mode_info.num_crtc) {
+		DRM_ERROR("invalid pageflip crtc %d\n", crtc_id);
+		return -EINVAL;
+	}
+
+	if (RREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id]) &
+	    GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK)
+		WREG32(mmGRPH_INTERRUPT_STATUS + crtc_offsets[crtc_id],
+		       GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK);
+
+	/* IRQ could occur when in initial stage */
+	if (amdgpu_crtc == NULL)
+		return 0;
+
+	spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
+	works = amdgpu_crtc->pflip_works;
+	if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+		DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
+						"AMDGPU_FLIP_SUBMITTED(%d)\n",
+						amdgpu_crtc->pflip_status,
+						AMDGPU_FLIP_SUBMITTED);
+		spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+		return 0;
+	}
+
+	/* page flip completed. clean up */
+	amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
+	amdgpu_crtc->pflip_works = NULL;
+
+	/* wakeup usersapce */
+	if (works->event)
+		drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event);
+
+	spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
+
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
+	schedule_work(&works->unpin_work);
+
+	return 0;
+}
+
+static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
+			    struct amdgpu_irq_src *source,
+			    struct amdgpu_iv_entry *entry)
+{
+	uint32_t disp_int, mask;
+	unsigned hpd;
+
+	if (entry->src_data[0] >= adev->mode_info.num_hpd) {
+		DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]);
+		return 0;
+	}
+
+	hpd = entry->src_data[0];
+	disp_int = RREG32(interrupt_status_offsets[hpd].reg);
+	mask = interrupt_status_offsets[hpd].hpd;
+
+	if (disp_int & mask) {
+		dce_v8_0_hpd_int_ack(adev, hpd);
+		schedule_delayed_work(&adev->hotplug_work, 0);
+		DRM_DEBUG("IH: HPD%d\n", hpd + 1);
+	}
+
+	return 0;
+
+}
+
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	return 0;
+}
+
+static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
+	.name = "dce_v8_0",
+	.early_init = dce_v8_0_early_init,
+	.sw_init = dce_v8_0_sw_init,
+	.sw_fini = dce_v8_0_sw_fini,
+	.hw_init = dce_v8_0_hw_init,
+	.hw_fini = dce_v8_0_hw_fini,
+	.suspend = dce_v8_0_suspend,
+	.resume = dce_v8_0_resume,
+	.is_idle = dce_v8_0_is_idle,
+	.soft_reset = dce_v8_0_soft_reset,
+	.set_clockgating_state = dce_v8_0_set_clockgating_state,
+	.set_powergating_state = dce_v8_0_set_powergating_state,
+};
+
+static void
+dce_v8_0_encoder_mode_set(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+
+	amdgpu_encoder->pixel_clock = adjusted_mode->clock;
+
+	/* need to call this here rather than in prepare() since we need some crtc info */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	/* set scaler clears this on some chips */
+	dce_v8_0_set_interleave(encoder->crtc, mode);
+
+	if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) {
+		dce_v8_0_afmt_enable(encoder, true);
+		dce_v8_0_afmt_setmode(encoder, adjusted_mode);
+	}
+}
+
+static void dce_v8_0_encoder_prepare(struct drm_encoder *encoder)
+{
+	struct amdgpu_device *adev = drm_to_adev(encoder->dev);
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
+
+	if ((amdgpu_encoder->active_device &
+	     (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+	    (amdgpu_encoder_get_dp_bridge_encoder_id(encoder) !=
+	     ENCODER_OBJECT_ID_NONE)) {
+		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+		if (dig) {
+			dig->dig_encoder = dce_v8_0_pick_dig_encoder(encoder);
+			if (amdgpu_encoder->active_device & ATOM_DEVICE_DFP_SUPPORT)
+				dig->afmt = adev->mode_info.afmt[dig->dig_encoder];
+		}
+	}
+
+	amdgpu_atombios_scratch_regs_lock(adev, true);
+
+	if (connector) {
+		struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
+		/* select the clock/data port if it uses a router */
+		if (amdgpu_connector->router.cd_valid)
+			amdgpu_i2c_router_select_cd_port(amdgpu_connector);
+
+		/* turn eDP panel on for mode set */
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+			amdgpu_atombios_encoder_set_edp_panel_power(connector,
+							     ATOM_TRANSMITTER_ACTION_POWER_ON);
+	}
+
+	/* this is needed for the pll/ss setup to work correctly in some cases */
+	amdgpu_atombios_encoder_set_crtc_source(encoder);
+	/* set up the FMT blocks */
+	dce_v8_0_program_fmt(encoder);
+}
+
+static void dce_v8_0_encoder_commit(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct amdgpu_device *adev = drm_to_adev(dev);
+
+	/* need to call this here as we need the crtc set up */
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+	amdgpu_atombios_scratch_regs_lock(adev, false);
+}
+
+static void dce_v8_0_encoder_disable(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	struct amdgpu_encoder_atom_dig *dig;
+
+	amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+
+	if (amdgpu_atombios_encoder_is_digital(encoder)) {
+		if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
+			dce_v8_0_afmt_enable(encoder, false);
+		dig = amdgpu_encoder->enc_priv;
+		dig->dig_encoder = -1;
+	}
+	amdgpu_encoder->active_device = 0;
+}
+
+/* these are handled by the primary encoders */
+static void dce_v8_0_ext_prepare(struct drm_encoder *encoder)
+{
+
+}
+
+static void dce_v8_0_ext_commit(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v8_0_ext_mode_set(struct drm_encoder *encoder,
+		      struct drm_display_mode *mode,
+		      struct drm_display_mode *adjusted_mode)
+{
+
+}
+
+static void dce_v8_0_ext_disable(struct drm_encoder *encoder)
+{
+
+}
+
+static void
+dce_v8_0_ext_dpms(struct drm_encoder *encoder, int mode)
+{
+
+}
+
+static const struct drm_encoder_helper_funcs dce_v8_0_ext_helper_funcs = {
+	.dpms = dce_v8_0_ext_dpms,
+	.prepare = dce_v8_0_ext_prepare,
+	.mode_set = dce_v8_0_ext_mode_set,
+	.commit = dce_v8_0_ext_commit,
+	.disable = dce_v8_0_ext_disable,
+	/* no detect for TMDS/LVDS yet */
+};
+
+static const struct drm_encoder_helper_funcs dce_v8_0_dig_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v8_0_encoder_prepare,
+	.mode_set = dce_v8_0_encoder_mode_set,
+	.commit = dce_v8_0_encoder_commit,
+	.disable = dce_v8_0_encoder_disable,
+	.detect = amdgpu_atombios_encoder_dig_detect,
+};
+
+static const struct drm_encoder_helper_funcs dce_v8_0_dac_helper_funcs = {
+	.dpms = amdgpu_atombios_encoder_dpms,
+	.mode_fixup = amdgpu_atombios_encoder_mode_fixup,
+	.prepare = dce_v8_0_encoder_prepare,
+	.mode_set = dce_v8_0_encoder_mode_set,
+	.commit = dce_v8_0_encoder_commit,
+	.detect = amdgpu_atombios_encoder_dac_detect,
+};
+
+static void dce_v8_0_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
+	if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+		amdgpu_atombios_encoder_fini_backlight(amdgpu_encoder);
+	kfree(amdgpu_encoder->enc_priv);
+	drm_encoder_cleanup(encoder);
+	kfree(amdgpu_encoder);
+}
+
+static const struct drm_encoder_funcs dce_v8_0_encoder_funcs = {
+	.destroy = dce_v8_0_encoder_destroy,
+};
+
+static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
+				 uint32_t encoder_enum,
+				 uint32_t supported_device,
+				 u16 caps)
+{
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_encoder *encoder;
+	struct amdgpu_encoder *amdgpu_encoder;
+
+	/* see if we already added it */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		amdgpu_encoder = to_amdgpu_encoder(encoder);
+		if (amdgpu_encoder->encoder_enum == encoder_enum) {
+			amdgpu_encoder->devices |= supported_device;
+			return;
+		}
+
+	}
+
+	/* add a new one */
+	amdgpu_encoder = kzalloc(sizeof(struct amdgpu_encoder), GFP_KERNEL);
+	if (!amdgpu_encoder)
+		return;
+
+	encoder = &amdgpu_encoder->base;
+	switch (adev->mode_info.num_crtc) {
+	case 1:
+		encoder->possible_crtcs = 0x1;
+		break;
+	case 2:
+	default:
+		encoder->possible_crtcs = 0x3;
+		break;
+	case 4:
+		encoder->possible_crtcs = 0xf;
+		break;
+	case 6:
+		encoder->possible_crtcs = 0x3f;
+		break;
+	}
+
+	amdgpu_encoder->enc_priv = NULL;
+
+	amdgpu_encoder->encoder_enum = encoder_enum;
+	amdgpu_encoder->encoder_id = (encoder_enum & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+	amdgpu_encoder->devices = supported_device;
+	amdgpu_encoder->rmx_type = RMX_OFF;
+	amdgpu_encoder->underscan_type = UNDERSCAN_OFF;
+	amdgpu_encoder->is_ext_encoder = false;
+	amdgpu_encoder->caps = caps;
+
+	switch (amdgpu_encoder->encoder_id) {
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
+		drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
+		drm_encoder_helper_add(encoder, &dce_v8_0_dac_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+			amdgpu_encoder->rmx_type = RMX_FULL;
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
+		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		} else {
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
+		}
+		drm_encoder_helper_add(encoder, &dce_v8_0_dig_helper_funcs);
+		break;
+	case ENCODER_OBJECT_ID_SI170B:
+	case ENCODER_OBJECT_ID_CH7303:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOA:
+	case ENCODER_OBJECT_ID_EXTERNAL_SDVOB:
+	case ENCODER_OBJECT_ID_TITFP513:
+	case ENCODER_OBJECT_ID_VT1623:
+	case ENCODER_OBJECT_ID_HDMI_SI1930:
+	case ENCODER_OBJECT_ID_TRAVIS:
+	case ENCODER_OBJECT_ID_NUTMEG:
+		/* these are handled by the primary encoders */
+		amdgpu_encoder->is_ext_encoder = true;
+		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
+		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
+		else
+			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
+		drm_encoder_helper_add(encoder, &dce_v8_0_ext_helper_funcs);
+		break;
+	}
+}
+
+static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
+	.bandwidth_update = &dce_v8_0_bandwidth_update,
+	.vblank_get_counter = &dce_v8_0_vblank_get_counter,
+	.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
+	.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
+	.hpd_sense = &dce_v8_0_hpd_sense,
+	.hpd_set_polarity = &dce_v8_0_hpd_set_polarity,
+	.hpd_get_gpio_reg = &dce_v8_0_hpd_get_gpio_reg,
+	.page_flip = &dce_v8_0_page_flip,
+	.page_flip_get_scanoutpos = &dce_v8_0_crtc_get_scanoutpos,
+	.add_encoder = &dce_v8_0_encoder_add,
+	.add_connector = &amdgpu_connector_add,
+};
+
+static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
+{
+	adev->mode_info.funcs = &dce_v8_0_display_funcs;
+}
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
+	.set = dce_v8_0_set_crtc_irq_state,
+	.process = dce_v8_0_crtc_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
+	.set = dce_v8_0_set_pageflip_irq_state,
+	.process = dce_v8_0_pageflip_irq,
+};
+
+static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
+	.set = dce_v8_0_set_hpd_irq_state,
+	.process = dce_v8_0_hpd_irq,
+};
+
+static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	if (adev->mode_info.num_crtc > 0)
+		adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc;
+	else
+		adev->crtc_irq.num_types = 0;
+	adev->crtc_irq.funcs = &dce_v8_0_crtc_irq_funcs;
+
+	adev->pageflip_irq.num_types = adev->mode_info.num_crtc;
+	adev->pageflip_irq.funcs = &dce_v8_0_pageflip_irq_funcs;
+
+	adev->hpd_irq.num_types = adev->mode_info.num_hpd;
+	adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version dce_v8_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_1_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 1,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_2_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 2,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_3_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 3,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
+
+const struct amdgpu_ip_block_version dce_v8_5_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_DCE,
+	.major = 8,
+	.minor = 5,
+	.rev = 0,
+	.funcs = &dce_v8_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
new file mode 100644
index 000000000000..13b802dd946a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DCE_V8_0_H__
+#define __DCE_V8_0_H__
+
+extern const struct amdgpu_ip_block_version dce_v8_0_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_1_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_2_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_3_ip_block;
+extern const struct amdgpu_ip_block_version dce_v8_5_ip_block;
+
+void dce_v8_0_disable_dce(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 000000000000..cd298556f7a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_sw_init(struct amdgpu_device *adev)
+{
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
+}
+
+static void df_v1_7_sw_fini(struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+	tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+	if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
+		fb_channel_number = 0;
+
+	return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	/* Put DF on broadcast mode */
+	adev->df.funcs->enable_broadcast_mode(adev, true);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_DISABLE;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	}
+
+	/* Exit broadcast mode */
+	adev->df.funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+					  u64 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+						bool enable)
+{
+	WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+		       ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+	.sw_init = df_v1_7_sw_init,
+	.sw_fini = df_v1_7_sw_fini,
+	.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+	.get_fb_channel_number = df_v1_7_get_fb_channel_number,
+	.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+	.update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v1_7_get_clockgating_state,
+	.enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
new file mode 100644
index 000000000000..74621104c487
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
+
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
+{
+	DF_V1_7_MGCG_DISABLE = 0,
+	DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+	DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+	DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+	DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+	DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
+};
+
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 000000000000..621aeca53880
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+#define DF_3_6_SMN_REG_INST_DIST        0x8
+#define DF_3_6_INST_CNT                 8
+
+/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
+#define DF_V3_6_MAX_COUNTERS		4
+
+/* get flags from df perfmon config */
+#define DF_V3_6_GET_EVENT(x)		(x & 0xFFUL)
+#define DF_V3_6_GET_INSTANCE(x)		((x >> 8) & 0xFFUL)
+#define DF_V3_6_GET_UNITMASK(x)		((x >> 16) & 0xFFUL)
+#define DF_V3_6_PERFMON_OVERFLOW	0xFFFFFFFFFFFFULL
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+				       16, 32, 0, 0, 0, 2, 4, 8};
+
+static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
+				 uint32_t ficaa_val)
+{
+	unsigned long flags, address, data;
+	uint32_t ficadl_val, ficadh_val;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+	WREG32(data, ficaa_val);
+
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+	ficadl_val = RREG32(data);
+
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+	ficadh_val = RREG32(data);
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
+}
+
+static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
+			     uint32_t ficadl_val, uint32_t ficadh_val)
+{
+	unsigned long flags, address, data;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
+	WREG32(data, ficaa_val);
+
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
+	WREG32(data, ficadl_val);
+
+	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
+	WREG32(data, ficadh_val);
+
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_rreg - read perfmon lo and hi
+ *
+ * required to be atomic.  no mmio method provided so subsequent reads for lo
+ * and hi require to preserve df finite state machine
+ */
+static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
+			    uint32_t lo_addr, uint32_t *lo_val,
+			    uint32_t hi_addr, uint32_t *hi_val)
+{
+	unsigned long flags, address, data;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, lo_addr);
+	*lo_val = RREG32(data);
+	WREG32(address, hi_addr);
+	*hi_val = RREG32(data);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/*
+ * df_v3_6_perfmon_wreg - write to perfmon lo and hi
+ *
+ * required to be atomic.  no mmio method provided so subsequent reads after
+ * data writes cannot occur to preserve data fabrics finite state machine.
+ */
+static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
+			    uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
+{
+	unsigned long flags, address, data;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, lo_addr);
+	WREG32(data, lo_val);
+	WREG32(address, hi_addr);
+	WREG32(data, hi_val);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+/* same as perfmon_wreg but return status on write value check */
+static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
+					  uint32_t lo_addr, uint32_t lo_val,
+					  uint32_t hi_addr, uint32_t  hi_val)
+{
+	unsigned long flags, address, data;
+	uint32_t lo_val_rb, hi_val_rb;
+
+	address = adev->nbio.funcs->get_pcie_index_offset(adev);
+	data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+	WREG32(address, lo_addr);
+	WREG32(data, lo_val);
+	WREG32(address, hi_addr);
+	WREG32(data, hi_val);
+
+	WREG32(address, lo_addr);
+	lo_val_rb = RREG32(data);
+	WREG32(address, hi_addr);
+	hi_val_rb = RREG32(data);
+	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
+		return -EBUSY;
+
+	return 0;
+}
+
+
+/*
+ * retry arming counters every 100 usecs within 1 millisecond interval.
+ * if retry fails after time out, return error.
+ */
+#define ARM_RETRY_USEC_TIMEOUT	1000
+#define ARM_RETRY_USEC_INTERVAL	100
+static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
+					  uint32_t lo_addr, uint32_t lo_val,
+					  uint32_t hi_addr, uint32_t  hi_val)
+{
+	int countdown = ARM_RETRY_USEC_TIMEOUT;
+
+	while (countdown) {
+
+		if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
+						     hi_addr, hi_val))
+			break;
+
+		countdown -= ARM_RETRY_USEC_INTERVAL;
+		udelay(ARM_RETRY_USEC_INTERVAL);
+	}
+
+	return countdown > 0 ? 0 : -ETIME;
+}
+
+/* get the number of df counters available */
+static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct amdgpu_device *adev;
+	struct drm_device *ddev;
+	int i, count;
+
+	ddev = dev_get_drvdata(dev);
+	adev = drm_to_adev(ddev);
+	count = 0;
+
+	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+		if (adev->df_perfmon_config_assign_mask[i] == 0)
+			count++;
+	}
+
+	return sysfs_emit(buf, "%i\n", count);
+}
+
+/* device attr for available perfmon counters */
+static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
+
+static void df_v3_6_query_hashes(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	adev->df.hash_status.hash_64k = false;
+	adev->df.hash_status.hash_2m = false;
+	adev->df.hash_status.hash_1g = false;
+
+	/* encoding for hash-enabled on Arcturus and Aldebaran */
+	if ((adev->asic_type == CHIP_ARCTURUS &&
+	     adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+	     (adev->asic_type == CHIP_ALDEBARAN &&
+	      adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
+		adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl64K);
+		adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl2M);
+		adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
+						DF_CS_UMC_AON0_DfGlobalCtrl,
+						GlbHashIntlvCtl1G);
+	}
+}
+
+/* init perfmons */
+static void df_v3_6_sw_init(struct amdgpu_device *adev)
+{
+	int i, ret;
+
+	ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
+	if (ret)
+		DRM_ERROR("failed to create file for available df counters\n");
+
+	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
+		adev->df_perfmon_config_assign_mask[i] = 0;
+
+	df_v3_6_query_hashes(adev);
+}
+
+static void df_v3_6_sw_fini(struct amdgpu_device *adev)
+{
+	if (adev->dev->kobj.sd)
+		device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	if (adev->asic_type == CHIP_ALDEBARAN) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0);
+		tmp &=
+		ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+		tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	}
+	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
+	if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
+		fb_channel_number = 0;
+
+	return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+		/* Put DF on broadcast mode */
+		adev->df.funcs->enable_broadcast_mode(adev, true);
+
+		if (enable) {
+			tmp = RREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater);
+			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+			tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+			WREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+		} else {
+			tmp = RREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater);
+			tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+			tmp |= DF_V3_6_MGCG_DISABLE;
+			WREG32_SOC15(DF, 0,
+					mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+		}
+
+		/* Exit broadcast mode */
+		adev->df.funcs->enable_broadcast_mode(adev, false);
+	}
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+					  u64 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+/* get assigned df perfmon ctr as int */
+static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev,
+				      uint64_t config,
+				      int counter_idx)
+{
+
+	return ((config & 0x0FFFFFFUL) ==
+			adev->df_perfmon_config_assign_mask[counter_idx]);
+
+}
+
+/* get address based on counter assignment */
+static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
+				 uint64_t config,
+				 int counter_idx,
+				 int is_ctrl,
+				 uint32_t *lo_base_addr,
+				 uint32_t *hi_base_addr)
+{
+	if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
+		return;
+
+	switch (counter_idx) {
+
+	case 0:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
+		break;
+	case 1:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
+		break;
+	case 2:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
+		break;
+	case 3:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
+		break;
+
+	}
+
+}
+
+/* get read counter address */
+static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
+					  uint64_t config,
+					  int counter_idx,
+					  uint32_t *lo_base_addr,
+					  uint32_t *hi_base_addr)
+{
+	df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr,
+								hi_base_addr);
+}
+
+/* get control counter settings i.e. address and values to set */
+static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+					  uint64_t config,
+					  int counter_idx,
+					  uint32_t *lo_base_addr,
+					  uint32_t *hi_base_addr,
+					  uint32_t *lo_val,
+					  uint32_t *hi_val,
+					  bool is_enable)
+{
+
+	uint32_t eventsel, instance, unitmask;
+	uint32_t instance_10, instance_5432, instance_76;
+
+	df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr,
+				hi_base_addr);
+
+	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+		DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
+				*lo_base_addr, *hi_base_addr);
+		return -ENXIO;
+	}
+
+	eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
+	unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
+	instance = DF_V3_6_GET_INSTANCE(config);
+
+	instance_10 = instance & 0x3;
+	instance_5432 = (instance >> 2) & 0xf;
+	instance_76 = (instance >> 6) & 0x3;
+
+	*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
+	*lo_val = is_enable ? *lo_val | (1 << 22) : *lo_val & ~(1 << 22);
+	*hi_val = (instance_76 << 29) | instance_5432;
+
+	DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+		config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
+
+	return 0;
+}
+
+/* add df performance counters for read */
+static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
+				   uint64_t config)
+{
+	int i;
+
+	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+		if (adev->df_perfmon_config_assign_mask[i] == 0U) {
+			adev->df_perfmon_config_assign_mask[i] =
+							config & 0x0FFFFFFUL;
+			return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+#define DEFERRED_ARM_MASK	(1 << 31)
+static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
+				    uint64_t config, int counter_idx,
+				    bool is_deferred)
+{
+
+	if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
+		return -EINVAL;
+
+	if (is_deferred)
+		adev->df_perfmon_config_assign_mask[counter_idx] |=
+							DEFERRED_ARM_MASK;
+	else
+		adev->df_perfmon_config_assign_mask[counter_idx] &=
+							~DEFERRED_ARM_MASK;
+
+	return 0;
+}
+
+static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
+				    uint64_t config,
+				    int counter_idx)
+{
+	return	(df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
+			(adev->df_perfmon_config_assign_mask[counter_idx]
+				& DEFERRED_ARM_MASK));
+
+}
+
+/* release performance counter */
+static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
+				     uint64_t config,
+				     int counter_idx)
+{
+	if (df_v3_6_pmc_has_counter(adev, config, counter_idx))
+		adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;
+}
+
+
+static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
+					 uint64_t config,
+					 int counter_idx)
+{
+	uint32_t lo_base_addr = 0, hi_base_addr = 0;
+
+	df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr,
+				      &hi_base_addr);
+
+	if ((lo_base_addr == 0) || (hi_base_addr == 0))
+		return;
+
+	df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
+}
+
+/* return available counter if is_add == 1 otherwise return error status. */
+static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+			     int counter_idx, int is_add)
+{
+	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+	int err = 0, ret = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
+		if (is_add)
+			return df_v3_6_pmc_add_cntr(adev, config);
+
+		ret = df_v3_6_pmc_get_ctrl_settings(adev,
+					config,
+					counter_idx,
+					&lo_base_addr,
+					&hi_base_addr,
+					&lo_val,
+					&hi_val,
+					true);
+
+		if (ret)
+			return ret;
+
+		err = df_v3_6_perfmon_arm_with_retry(adev,
+						     lo_base_addr,
+						     lo_val,
+						     hi_base_addr,
+						     hi_val);
+
+		if (err)
+			ret = df_v3_6_pmc_set_deferred(adev, config,
+							counter_idx, true);
+
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+			    int counter_idx, int is_remove)
+{
+	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+	int ret = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
+		ret = df_v3_6_pmc_get_ctrl_settings(adev,
+			config,
+			counter_idx,
+			&lo_base_addr,
+			&hi_base_addr,
+			&lo_val,
+			&hi_val,
+			false);
+
+		if (ret)
+			return ret;
+
+		df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
+							hi_base_addr, hi_val);
+
+		if (is_remove) {
+			df_v3_6_reset_perfmon_cntr(adev, config, counter_idx);
+			df_v3_6_pmc_release_cntr(adev, config, counter_idx);
+		}
+
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+				  uint64_t config,
+				  int counter_idx,
+				  uint64_t *count)
+{
+	uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
+	*count = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+	case CHIP_ARCTURUS:
+		df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
+						&lo_base_addr, &hi_base_addr);
+
+		if ((lo_base_addr == 0) || (hi_base_addr == 0))
+			return;
+
+		/* rearm the counter or throw away count value on failure */
+		if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {
+			int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
+							lo_base_addr, lo_val,
+							hi_base_addr, hi_val);
+
+			if (rearm_err)
+				return;
+
+			df_v3_6_pmc_set_deferred(adev, config, counter_idx,
+									false);
+		}
+
+		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
+				hi_base_addr, &hi_val);
+
+		*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+
+		if (*count >= DF_V3_6_PERFMON_OVERFLOW)
+			*count = 0;
+
+		DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+			 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+
+		break;
+	default:
+		break;
+	}
+}
+
+static bool df_v3_6_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+	uint32_t hw_assert_msklo, hw_assert_mskhi;
+	uint32_t v0, v1, v28, v31;
+
+	hw_assert_msklo = RREG32_SOC15(DF, 0,
+				mmDF_CS_UMC_AON0_HardwareAssertMaskLow);
+	hw_assert_mskhi = RREG32_SOC15(DF, 0,
+				mmDF_NCS_PG0_HardwareAssertMaskHigh);
+
+	v0 = REG_GET_FIELD(hw_assert_msklo,
+		DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+	v1 = REG_GET_FIELD(hw_assert_msklo,
+		DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+	v28 = REG_GET_FIELD(hw_assert_mskhi,
+		DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+	v31 = REG_GET_FIELD(hw_assert_mskhi,
+		DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+	if (v0 && v1 && v28 && v31)
+		return true;
+	else if (!v0 && !v1 && !v28 && !v31)
+		return false;
+	else {
+		dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+				v0, v1, v28, v31);
+		return false;
+	}
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+	.sw_init = df_v3_6_sw_init,
+	.sw_fini = df_v3_6_sw_fini,
+	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
+	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+	.update_medium_grain_clock_gating =
+			df_v3_6_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v3_6_get_clockgating_state,
+	.pmc_start = df_v3_6_pmc_start,
+	.pmc_stop = df_v3_6_pmc_stop,
+	.pmc_get_count = df_v3_6_pmc_get_count,
+	.get_fica = df_v3_6_get_fica,
+	.set_fica = df_v3_6_set_fica,
+	.query_ras_poison_mode = df_v3_6_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 000000000000..2505c7ef258a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
+
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+	DF_V3_6_MGCG_DISABLE = 0,
+	DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+	DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+	DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+	DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
+};
+
+extern const struct attribute_group *df_v3_6_attr_groups[];
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
new file mode 100644
index 000000000000..2a573e33908b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_15.h"
+
+#include "df/df_4_15_offset.h"
+#include "df/df_4_15_sh_mask.h"
+
+static void df_v4_15_hw_init(struct amdgpu_device *adev)
+{
+	if (adev->have_atomics_support) {
+		uint32_t tmp;
+		uint32_t dis_lcl_proc = (1 <<  1 |
+					1 <<  2 |
+					1 << 13);
+
+		tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1);
+		tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT);
+		WREG32_SOC15(DF, 0, regNCSConfigurationRegister1, tmp);
+	}
+}
+
+const struct amdgpu_df_funcs df_v4_15_funcs = {
+	.hw_init = df_v4_15_hw_init
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.h b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
new file mode 100644
index 000000000000..dddf2422112a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_15_H__
+#define __DF_V4_15_H__
+
+extern const struct amdgpu_df_funcs df_v4_15_funcs;
+
+#endif /* __DF_V4_15_H__ */
+
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.c b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
new file mode 100644
index 000000000000..e8b9e19ede2e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_3.h"
+
+#include "df/df_4_3_offset.h"
+#include "df/df_4_3_sh_mask.h"
+
+static bool df_v4_3_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+	uint32_t hw_assert_msklo, hw_assert_mskhi;
+	uint32_t v0, v1, v28, v31;
+
+	hw_assert_msklo = RREG32_SOC15(DF, 0,
+				regDF_CS_UMC_AON0_HardwareAssertMaskLow);
+	hw_assert_mskhi = RREG32_SOC15(DF, 0,
+				regDF_NCS_PG0_HardwareAssertMaskHigh);
+
+	v0 = REG_GET_FIELD(hw_assert_msklo,
+		DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk0);
+	v1 = REG_GET_FIELD(hw_assert_msklo,
+		DF_CS_UMC_AON0_HardwareAssertMaskLow, HWAssertMsk1);
+	v28 = REG_GET_FIELD(hw_assert_mskhi,
+		DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk28);
+	v31 = REG_GET_FIELD(hw_assert_mskhi,
+		DF_NCS_PG0_HardwareAssertMaskHigh, HWAssertMsk31);
+
+	if (v0 && v1 && v28 && v31)
+		return true;
+	else if (!v0 && !v1 && !v28 && !v31)
+		return false;
+	else {
+		dev_warn(adev->dev, "DF poison setting is inconsistent(%d:%d:%d:%d)!\n",
+				v0, v1, v28, v31);
+		return false;
+	}
+}
+
+const struct amdgpu_df_funcs df_v4_3_funcs = {
+	.query_ras_poison_mode = df_v4_3_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_3.h b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
new file mode 100644
index 000000000000..06ef0724edd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_3_H__
+#define __DF_V4_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
new file mode 100644
index 000000000000..a47960a0babd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_6_2.h"
+
+static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev)
+{
+	/* return true since related regs are inaccessible */
+	return true;
+}
+
+const struct amdgpu_df_funcs df_v4_6_2_funcs = {
+	.query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
new file mode 100644
index 000000000000..3bc3e6d216e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V4_6_2_H__
+#define __DF_V4_6_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_df_funcs df_v4_6_2_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
new file mode 100644
index 000000000000..e9f177e9e3cf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "dimgrey_cavefish_ip_offset.h"
+
+int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialize the block needed by driver */
+	uint32_t i;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/emu_soc.c b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
new file mode 100644
index 000000000000..d72c25c1b987
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+
+int emu_soc_asic_init(struct amdgpu_device *adev)
+{
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
new file mode 100644
index 000000000000..d75b9940f248
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -0,0 +1,10249 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "nv.h"
+#include "nvd.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "clearstate_gfx10.h"
+#include "v10_structs.h"
+#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
+#include "nbio_v2_3.h"
+
+/*
+ * Navi10 has two graphic rings to share each graphic pipe.
+ * 1. Primary ring
+ * 2. Async ring
+ */
+#define GFX10_NUM_GFX_RINGS_NV1X	1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid	2
+#define GFX10_MEC_HPD_SIZE	2048
+
+#define F32_CE_PROGRAM_RAM_SIZE		65536
+#define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
+
+#define mmCGTT_GS_NGG_CLK_CTRL	0x5087
+#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX	1
+#define mmCGTT_SPI_RA0_CLK_CTRL 0x507a
+#define mmCGTT_SPI_RA0_CLK_CTRL_BASE_IDX 1
+#define mmCGTT_SPI_RA1_CLK_CTRL 0x507b
+#define mmCGTT_SPI_RA1_CLK_CTRL_BASE_IDX 1
+
+#define GB_ADDR_CONFIG__NUM_PKRS__SHIFT                                                                       0x8
+#define GB_ADDR_CONFIG__NUM_PKRS_MASK                                                                         0x00000700L
+
+#define mmCGTS_TCC_DISABLE_gc_10_3                 0x5006
+#define mmCGTS_TCC_DISABLE_gc_10_3_BASE_IDX        1
+#define mmCGTS_USER_TCC_DISABLE_gc_10_3            0x5007
+#define mmCGTS_USER_TCC_DISABLE_gc_10_3_BASE_IDX   1
+
+#define mmCP_MEC_CNTL_Sienna_Cichlid                      0x0f55
+#define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX             0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid			0x4ca0
+#define mmRLC_SAFE_MODE_Sienna_Cichlid_BASE_IDX		1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid		0x4ca1
+#define mmRLC_CP_SCHEDULERS_Sienna_Cichlid_BASE_IDX	1
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid			0x11ec
+#define mmSPI_CONFIG_CNTL_Sienna_Cichlid_BASE_IDX		0
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid		0x0fc1
+#define mmVGT_ESGS_RING_SIZE_Sienna_Cichlid_BASE_IDX	0
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid		0x0fc2
+#define mmVGT_GSVS_RING_SIZE_Sienna_Cichlid_BASE_IDX	0
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid			0x0fc3
+#define mmVGT_TF_RING_SIZE_Sienna_Cichlid_BASE_IDX	0
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid		0x0fc4
+#define mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid_BASE_IDX	0
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid		0x0fc5
+#define mmVGT_TF_MEMORY_BASE_Sienna_Cichlid_BASE_IDX	0
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid		0x0fc6
+#define mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid_BASE_IDX	0
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid__SHIFT	0x1a
+#define GRBM_STATUS2__RLC_BUSY_Sienna_Cichlid_MASK	0x04000000L
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid_MASK	0x00000FFCL
+#define CP_RB_DOORBELL_RANGE_LOWER__DOORBELL_RANGE_LOWER_Sienna_Cichlid__SHIFT	0x2
+#define CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK	0x00000FFCL
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid			0x1580
+#define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX	0
+
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish                0x0105
+#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish                0x0106
+#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX       1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh                0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh                0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX       1
+
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6                0x002d
+#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6                0x002e
+#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX       1
+
+#define mmSPI_CONFIG_CNTL_1_Vangogh		 0x2441
+#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX	 1
+#define mmVGT_TF_MEMORY_BASE_HI_Vangogh          0x2261
+#define mmVGT_TF_MEMORY_BASE_HI_Vangogh_BASE_IDX 1
+#define mmVGT_HS_OFFCHIP_PARAM_Vangogh           0x224f
+#define mmVGT_HS_OFFCHIP_PARAM_Vangogh_BASE_IDX  1
+#define mmVGT_TF_RING_SIZE_Vangogh               0x224e
+#define mmVGT_TF_RING_SIZE_Vangogh_BASE_IDX      1
+#define mmVGT_GSVS_RING_SIZE_Vangogh             0x2241
+#define mmVGT_GSVS_RING_SIZE_Vangogh_BASE_IDX    1
+#define mmVGT_TF_MEMORY_BASE_Vangogh             0x2250
+#define mmVGT_TF_MEMORY_BASE_Vangogh_BASE_IDX    1
+#define mmVGT_ESGS_RING_SIZE_Vangogh             0x2240
+#define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX    1
+#define mmSPI_CONFIG_CNTL_Vangogh                0x2440
+#define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX       1
+#define mmGCR_GENERAL_CNTL_Vangogh               0x1580
+#define mmGCR_GENERAL_CNTL_Vangogh_BASE_IDX      0
+#define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh   0x0000FFFFL
+
+#define mmCP_HYP_PFP_UCODE_ADDR			0x5814
+#define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX	1
+#define mmCP_HYP_PFP_UCODE_DATA			0x5815
+#define mmCP_HYP_PFP_UCODE_DATA_BASE_IDX	1
+#define mmCP_HYP_CE_UCODE_ADDR			0x5818
+#define mmCP_HYP_CE_UCODE_ADDR_BASE_IDX		1
+#define mmCP_HYP_CE_UCODE_DATA			0x5819
+#define mmCP_HYP_CE_UCODE_DATA_BASE_IDX		1
+#define mmCP_HYP_ME_UCODE_ADDR			0x5816
+#define mmCP_HYP_ME_UCODE_ADDR_BASE_IDX		1
+#define mmCP_HYP_ME_UCODE_DATA			0x5817
+#define mmCP_HYP_ME_UCODE_DATA_BASE_IDX		1
+
+#define mmCPG_PSP_DEBUG				0x5c10
+#define mmCPG_PSP_DEBUG_BASE_IDX		1
+#define mmCPC_PSP_DEBUG				0x5c11
+#define mmCPC_PSP_DEBUG_BASE_IDX		1
+#define CPC_PSP_DEBUG__GPA_OVERRIDE_MASK	0x00000008L
+#define CPG_PSP_DEBUG__GPA_OVERRIDE_MASK	0x00000008L
+
+//CC_GC_SA_UNIT_DISABLE
+#define mmCC_GC_SA_UNIT_DISABLE                 0x0fe9
+#define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX        0
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT	0x8
+#define CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK		0x0000FF00L
+//GC_USER_SA_UNIT_DISABLE
+#define mmGC_USER_SA_UNIT_DISABLE               0x0fea
+#define mmGC_USER_SA_UNIT_DISABLE_BASE_IDX      0
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT	0x8
+#define GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK	0x0000FF00L
+//PA_SC_ENHANCE_3
+#define mmPA_SC_ENHANCE_3                       0x1085
+#define mmPA_SC_ENHANCE_3_BASE_IDX              0
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO__SHIFT 0x3
+#define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO_MASK   0x00000008L
+
+#define mmCGTT_SPI_CS_CLK_CTRL			0x507c
+#define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX         1
+
+#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid		0x16f3
+#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX	0
+#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid          0x15db
+#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX	0
+
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid              0x2030
+#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX     0
+
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid               0x4ca5
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX      1
+
+MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi10_me.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
+MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi14_me.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi12_me.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_ce.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_pfp.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_me.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_mec2.bin");
+MODULE_FIRMWARE("amdgpu/sienna_cichlid_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/navy_flounder_ce.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_me.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/vangogh_ce.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_me.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin");
+MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/beige_goby_ce.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_pfp.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_me.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_mec2.bin");
+MODULE_FIRMWARE("amdgpu/beige_goby_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/yellow_carp_ce.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_pfp.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_me.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin");
+MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
+MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
+	/* SE status registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
+	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
+	/* compute registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
+	/* gfx queue registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+	/* gfx header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = {
+	/* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x33),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070105),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = {
+	/* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1a8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1ac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1b8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1bc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1cc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x26),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x25),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x3b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = {
+	/* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x2),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x20),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x24),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x38),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x3c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x18),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x50),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x54),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x58),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x5c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x48),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x40),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x44),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1a),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x60),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x64),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x70),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x74),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x68),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x6c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x78),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x7c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x88),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x8c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x80),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x84),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x90),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x94),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x98),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x9c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xa8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xac),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xbc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xb4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xc8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xcc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xec),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x16),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xf8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xfc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x17),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x13),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xe0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x118),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x11c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x120),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x124),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xdc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x110),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x114),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x14),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x108),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x10c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x19),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0xd8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1b),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x128),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x12c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x138),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x13c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x12),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x134),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x140),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x144),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x150),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x154),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x148),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x14c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x7),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x158),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x15c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x168),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xa),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x16c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x9),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x160),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x164),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x170),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x174),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x180),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x184),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x178),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x17c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x18c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x5),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x198),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xc),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x19c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x190),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xe),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x194),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x30),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xd),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x34),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x11),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1d),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x2c),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_DATA, 0xFFFFFFFF, 0xb),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_SAMPLE_SKEW, 0x000000FF, 0x1f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLOBALS_MUXSEL_SKEW, 0x000000FF, 0x22),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x1),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_SAMPLE_SKEW, 0x000000FF, 0x6),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x10),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x10000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_SE_MUXSEL_SKEW, 0x000000FF, 0x15),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_DESER_START_SKEW, 0x000000FF, 0x35),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x10f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = {
+	/* Pending on emulation bring up */
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffffffff, 0x00070104),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_START_PHASE, 0x000000ff, 0x00000004),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
+
+	/* This is not in GDB yet. Don't remove it. It fixes a GPU hang on Navy Flounder. */
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
+
+	/* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x00001d00, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0x40000000, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00040000, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0x01000000, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x0000001f, 0x00180070),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xb0000ff0, 0x30000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff000000, 0x7e000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xe07df47f, 0x00180070),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_FAST_CLKS, 0x3fffffff, 0x0000493e),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x3c000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0xa0000000, 0xa0000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x00008000, 0x003c8014),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xd8d8d8d8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000003),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00009d00, 0x00008500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xffffffff, 0x000fffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_DRAM_BURST_CTRL, 0x00000010, 0x00000017),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xfcfcfcfc, 0xd8d8d8d8),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77707770, 0x21302130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77707770, 0x21302130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xfc02002f, 0x9402002f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00002188, 0x00000188),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x08000009, 0x08000009),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xcc3fcc03, 0x842a4c02),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000000f, 0x00000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffff3109, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x00030008, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+	 (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
+	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+/* TODO: pending on golden setting value of gb address config */
+#define CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN 0x00100044
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev);
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+				 struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev);
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev);
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+					   uint16_t pasid, uint32_t flush_type,
+					   bool all_hub, uint8_t dst_sel);
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+					       unsigned int vmid);
+
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state);
+static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	u64 shader_mc_addr;
+
+	/* Cleaner shader MC address */
+	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring)
+{
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = ring->wptr_gpu_addr;
+	uint32_t eng_sel = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_COMPUTE:
+		eng_sel = 0;
+		break;
+	case AMDGPU_RING_TYPE_GFX:
+		eng_sel = 4;
+		break;
+	case AMDGPU_RING_TYPE_MES:
+		eng_sel = 5;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   enum amdgpu_unmap_queues_action action,
+				   u64 gpu_addr, u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   u64 addr,
+				   u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+					uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+					uint32_t xcc_id, uint32_t vmid)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	unsigned i;
+	uint32_t tmp;
+
+	/* enter save mode */
+	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+	mutex_lock(&adev->srbm_mutex);
+	nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+		WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+		/* wait till dequeue take effects */
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+		if (i >= adev->usec_timeout)
+			dev_err(adev->dev, "fail to wait on hqd deactive\n");
+	} else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+			     (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+		tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+		WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+		/* wait till dequeue take effects */
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+		if (i >= adev->usec_timeout)
+			dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+	} else {
+		dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+	}
+
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	/* exit safe mode */
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx10_kiq_set_resources,
+	.kiq_map_queues = gfx10_kiq_map_queues,
+	.kiq_unmap_queues = gfx10_kiq_unmap_queues,
+	.kiq_query_status = gfx10_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+	.kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+	.invalidate_tlbs_size = 2,
+};
+
+static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs;
+}
+
+static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_0_nv10,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_0_nv10));
+		break;
+	case IP_VERSION(10, 1, 1):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_1_nv14,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_nv14));
+		break;
+	case IP_VERSION(10, 1, 2):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_rlc_spm_10_1_2_nv12,
+						(const u32)ARRAY_SIZE(golden_settings_gc_rlc_spm_10_1_2_nv12));
+		break;
+	default:
+		break;
+	}
+}
+
+static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_1,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_1));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_0_nv10,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
+		break;
+	case IP_VERSION(10, 1, 1):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_1_1,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_1_nv14,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
+		break;
+	case IP_VERSION(10, 1, 2):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_1_2,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_1_2_nv12,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
+		break;
+	case IP_VERSION(10, 3, 0):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_sienna_cichlid,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_sienna_cichlid));
+		break;
+	case IP_VERSION(10, 3, 2):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_2,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_2));
+		break;
+	case IP_VERSION(10, 3, 1):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_vangogh,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh));
+		break;
+	case IP_VERSION(10, 3, 3):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_3,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_3));
+		break;
+	case IP_VERSION(10, 3, 4):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_4,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_4));
+		break;
+	case IP_VERSION(10, 3, 5):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_5,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_5));
+		break;
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_0_cyan_skillfish,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish));
+		break;
+	case IP_VERSION(10, 3, 6):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_6,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_6));
+		break;
+	case IP_VERSION(10, 3, 7):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_10_3_7,
+						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_7));
+		break;
+	default:
+		break;
+	}
+	gfx_v10_0_init_spm_golden_registers(adev);
+}
+
+static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+				       bool wc, uint32_t reg, uint32_t val)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+				  int mem_space, int opt, uint32_t addr0,
+				  uint32_t addr1, uint32_t ref, uint32_t mask,
+				  uint32_t inv)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	amdgpu_ring_write(ring,
+			  /* memory (1) or register (0) */
+			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
+			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
+			   WAIT_REG_MEM_ENGINE(eng_sel)));
+
+	if (mem_space)
+		BUG_ON(addr0 & 0x3); /* Dword align */
+	amdgpu_ring_write(ring, addr0);
+	amdgpu_ring_write(ring, addr1);
+	amdgpu_ring_write(ring, ref);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+	uint32_t tmp = 0;
+	unsigned int i;
+	int r;
+
+	WREG32(scratch, 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 3);
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+	amdgpu_ring_write(ring, scratch -
+			  PACKET3_SET_UCONFIG_REG_START);
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		if (amdgpu_emu_mode == 1)
+			msleep(1);
+		else
+			udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+
+	return r;
+}
+
+static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+	unsigned int index;
+	uint64_t gpu_addr;
+	uint32_t *cpu_ptr;
+	long r;
+
+	memset(&ib, 0, sizeof(ib));
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	cpu_ptr = &adev->wb.wb[index];
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
+	}
+
+	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ib.ptr[2] = lower_32_bits(gpu_addr);
+	ib.ptr[3] = upper_32_bits(gpu_addr);
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r)
+		goto err2;
+
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto err2;
+	} else if (r < 0) {
+		goto err2;
+	}
+
+	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+		r = 0;
+	else
+		r = -EINVAL;
+err2:
+	amdgpu_ib_free(&ib, NULL);
+	dma_fence_put(f);
+err1:
+	amdgpu_device_wb_free(adev, index);
+	return r;
+}
+
+static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
+{
+	amdgpu_ucode_release(&adev->gfx.pfp_fw);
+	amdgpu_ucode_release(&adev->gfx.me_fw);
+	amdgpu_ucode_release(&adev->gfx.ce_fw);
+	amdgpu_ucode_release(&adev->gfx.rlc_fw);
+	amdgpu_ucode_release(&adev->gfx.mec_fw);
+	amdgpu_ucode_release(&adev->gfx.mec2_fw);
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (!adev->gfx.cp_fw_write_wait)
+		DRM_WARN_ONCE("CP firmware version too old, please update!");
+}
+
+static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
+{
+	bool ret = false;
+
+	switch (adev->pdev->revision) {
+	case 0xc2:
+	case 0xc3:
+		ret = true;
+		break;
+	default:
+		ret = false;
+		break;
+	}
+
+	return ret;
+}
+
+static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+		if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
+			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+		break;
+	default:
+		break;
+	}
+}
+
+static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
+{
+	char fw_name[53];
+	char ucode_prefix[30];
+	const char *wks = "";
+	int err;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	uint16_t version_major;
+	uint16_t version_minor;
+
+	DRM_DEBUG("\n");
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) &&
+	    (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00)))
+		wks = "_wks";
+	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_me%s.bin", ucode_prefix, wks);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_ce%s.bin", ucode_prefix, wks);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
+
+	if (!amdgpu_sriov_vf(adev)) {
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
+		err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+		if (err)
+			goto out;
+
+		/* don't validate this firmware. There are apparently firmwares
+		 * in the wild with incorrect size in the header
+		 */
+		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+		if (err)
+			goto out;
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_mec%s.bin", ucode_prefix, wks);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
+	if (!err) {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
+	} else {
+		err = 0;
+		adev->gfx.mec2_fw = NULL;
+	}
+
+	gfx_v10_0_check_fw_write_wait(adev);
+out:
+	if (err) {
+		amdgpu_ucode_release(&adev->gfx.pfp_fw);
+		amdgpu_ucode_release(&adev->gfx.me_fw);
+		amdgpu_ucode_release(&adev->gfx.ce_fw);
+		amdgpu_ucode_release(&adev->gfx.rlc_fw);
+		amdgpu_ucode_release(&adev->gfx.mec_fw);
+		amdgpu_ucode_release(&adev->gfx.mec2_fw);
+	}
+
+	gfx_v10_0_check_gfxoff_flag(adev);
+
+	return err;
+}
+
+static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	/* begin clear state */
+	count += 2;
+	/* context control state */
+	count += 3;
+
+	for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT)
+				count += 2 + ext->reg_count;
+			else
+				return 0;
+		}
+	}
+
+	/* set PA_SC_TILE_STEERING_OVERRIDE */
+	count += 3;
+	/* end clear state */
+	count += 2;
+	/* clear state */
+	count += 2;
+
+	return count;
+}
+
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+	u32 count = 0;
+	int ctx_reg_offset;
+
+	if (adev->gfx.rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+
+	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	buffer[count++] = cpu_to_le32(ctx_reg_offset);
+	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+	amdgpu_gfx_csb_preamble_end(buffer, count);
+}
+
+static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
+{
+	/* clear state block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+			&adev->gfx.rlc.clear_state_gpu_addr,
+			(void **)&adev->gfx.rlc.cs_ptr);
+
+	/* jump table block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+			&adev->gfx.rlc.cp_table_gpu_addr,
+			(void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
+	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
+	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
+	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
+	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+		reg_access_ctrl->spare_int =
+			SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid);
+		break;
+	default:
+		reg_access_ctrl->spare_int =
+			SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
+		break;
+	}
+	adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
+{
+	const struct cs_section_def *cs_data;
+	int r;
+
+	adev->gfx.rlc.cs_data = gfx10_cs_data;
+
+	cs_data = adev->gfx.rlc.cs_data;
+
+	if (cs_data) {
+		/* init clear state block */
+		r = amdgpu_gfx_rlc_init_csb(adev);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static void gfx_v10_0_me_init(struct amdgpu_device *adev)
+{
+	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+	amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
+{
+	int r;
+	u32 *hpd;
+	const __le32 *fw_data = NULL;
+	unsigned int fw_size;
+	u32 *fw = NULL;
+	size_t mec_hpd_size;
+
+	const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
+
+	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
+
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+					      AMDGPU_GEM_DOMAIN_GTT,
+					      &adev->gfx.mec.hpd_eop_obj,
+					      &adev->gfx.mec.hpd_eop_gpu_addr,
+					      (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v10_0_mec_fini(adev);
+			return r;
+		}
+
+		memset(hpd, 0, mec_hpd_size);
+
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+			 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+		r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					      &adev->gfx.mec.mec_fw_obj,
+					      &adev->gfx.mec.mec_fw_gpu_addr,
+					      (void **)&fw);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+			gfx_v10_0_mec_fini(adev);
+			return r;
+		}
+
+		memcpy(fw, fw_data, fw_size);
+
+		amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+	}
+
+	return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(address << SQ_IND_INDEX__INDEX__SHIFT));
+	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+			   uint32_t thread, uint32_t regno,
+			   uint32_t num, uint32_t *out)
+{
+	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
+		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+		(SQ_IND_INDEX__AUTO_INCR_MASK));
+	while (num--)
+		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+	/* in gfx10 the SIMD_ID is specified as part of the INSTANCE
+	 * field when performing a select_se_sh so it should be
+	 * zero here
+	 */
+	WARN_ON(simd != 0);
+
+	/* type 2 wave data */
+	dst[(*no_fields)++] = 2;
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				     uint32_t wave, uint32_t start,
+				     uint32_t size, uint32_t *dst)
+{
+	WARN_ON(simd != 0);
+
+	wave_read_regs(
+		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+		dst);
+}
+
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				      uint32_t wave, uint32_t thread,
+				      uint32_t start, uint32_t size,
+				      uint32_t *dst)
+{
+	wave_read_regs(
+		adev, wave, thread,
+		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
+				       u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+	nv_grbm_select(adev, me, pipe, q, vm);
+}
+
+static void gfx_v10_0_update_perfmon_mgcg(struct amdgpu_device *adev,
+					  bool enable)
+{
+	uint32_t data, def;
+
+	data = def = RREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL);
+
+	if (enable)
+		data |= RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK;
+	else
+		data &= ~RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK;
+
+	if (data != def)
+		WREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL, data);
+}
+
+static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v10_0_select_se_sh,
+	.read_wave_data = &gfx_v10_0_read_wave_data,
+	.read_wave_sgprs = &gfx_v10_0_read_wave_sgprs,
+	.read_wave_vgprs = &gfx_v10_0_read_wave_vgprs,
+	.select_me_pipe_q = &gfx_v10_0_select_me_pipe_q,
+	.init_spm_golden = &gfx_v10_0_init_spm_golden_registers,
+	.update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg,
+};
+
+static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
+{
+	u32 gb_addr_config;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+		adev->gfx.config.gb_addr_config_fields.num_pkrs =
+			1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+		break;
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = CYAN_SKILLFISH_GB_ADDR_CONFIG_GOLDEN;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	adev->gfx.config.gb_addr_config = gb_addr_config;
+
+	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_PIPES);
+
+	adev->gfx.config.max_tile_pipes =
+		adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
+	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+}
+
+static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+				   int me, int pipe, int queue)
+{
+	struct amdgpu_ring *ring;
+	unsigned int irq_type;
+	unsigned int hw_prio;
+
+	ring = &adev->gfx.gfx_ring[ring_id];
+
+	ring->me = me;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+
+	if (!ring_id)
+		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+	else
+		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+				hw_prio, NULL);
+}
+
+static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+				       int mec, int pipe, int queue)
+{
+	unsigned int irq_type;
+	struct amdgpu_ring *ring;
+	unsigned int hw_prio;
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX10_MEC_HPD_SIZE);
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+			     hw_prio, NULL);
+}
+
+static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+	uint32_t *ptr;
+	uint32_t inst;
+
+	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+		adev->gfx.ip_dump_core = NULL;
+	} else {
+		adev->gfx.ip_dump_core = ptr;
+	}
+
+	/* Allocate memory for compute queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+		adev->gfx.mec.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+		adev->gfx.ip_dump_compute_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_compute_queues = ptr;
+	}
+
+	/* Allocate memory for gfx queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+		adev->gfx.me.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+		adev->gfx.ip_dump_gfx_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_gfx_queues = ptr;
+	}
+}
+
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i, j, k, r, ring_id = 0;
+	int xcc_id = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 8;
+		adev->gfx.mec.num_mec = 2;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 8;
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 2;
+		adev->gfx.me.num_queue_per_pipe = 2;
+		adev->gfx.mec.num_mec = 2;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 4;
+		break;
+	default:
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.mec.num_mec = 1;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 8;
+		break;
+	}
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+		adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 101 &&
+		    adev->gfx.pfp_fw_version  >= 158 &&
+		    adev->gfx.mec_fw_version >= 151) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 64 &&
+		    adev->gfx.pfp_fw_version >= 100 &&
+		    adev->gfx.mec_fw_version >= 122) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(10, 3, 6):
+		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 14 &&
+		    adev->gfx.pfp_fw_version >= 17 &&
+		    adev->gfx.mec_fw_version >= 24) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version >= 4 &&
+		    adev->gfx.pfp_fw_version >= 9 &&
+		    adev->gfx.mec_fw_version >= 12) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	default:
+		adev->gfx.enable_cleaner_shader = false;
+		break;
+	}
+
+	/* KIQ event */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+			      GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
+			      &adev->gfx.kiq[0].irq);
+	if (r)
+		return r;
+
+	/* EOP Event */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+			      GFX_10_1__SRCID__CP_EOP_INTERRUPT,
+			      &adev->gfx.eop_irq);
+	if (r)
+		return r;
+
+	/* Bad opcode Event */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+			      GFX_10_1__SRCID__CP_BAD_OPCODE_ERROR,
+			      &adev->gfx.bad_op_irq);
+	if (r)
+		return r;
+
+	/* Privileged reg */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
+			      &adev->gfx.priv_reg_irq);
+	if (r)
+		return r;
+
+	/* Privileged inst */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT,
+			      &adev->gfx.priv_inst_irq);
+	if (r)
+		return r;
+
+	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+	gfx_v10_0_me_init(adev);
+
+	if (adev->gfx.rlc.funcs) {
+		if (adev->gfx.rlc.funcs->init) {
+			r = adev->gfx.rlc.funcs->init(adev);
+			if (r) {
+				dev_err(adev->dev, "Failed to init rlc BOs!\n");
+				return r;
+			}
+		}
+	}
+
+	r = gfx_v10_0_mec_init(adev);
+	if (r) {
+		DRM_ERROR("Failed to init MEC BOs!\n");
+		return r;
+	}
+
+	/* set up the gfx ring */
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v10_0_gfx_ring_init(adev, ring_id,
+							    i, k, j);
+				if (r)
+					return r;
+				ring_id++;
+			}
+		}
+	}
+
+	ring_id = 0;
+	/* set up the compute queues - allocate horizontally across pipes */
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+								     k, j))
+					continue;
+
+				r = gfx_v10_0_compute_ring_init(adev, ring_id,
+								i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
+		}
+	}
+
+	adev->gfx.gfx_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+	adev->gfx.compute_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+	if (!amdgpu_sriov_vf(adev) &&
+	    !adev->debug_disable_gpu_ring_reset) {
+		adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+		adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+	}
+
+	r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
+	if (r) {
+		DRM_ERROR("Failed to init KIQ BOs!\n");
+		return r;
+	}
+
+	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+	if (r)
+		return r;
+
+	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
+	if (r)
+		return r;
+
+	/* allocate visible FB for rlc auto-loading fw */
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
+		if (r)
+			return r;
+	}
+
+	adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
+
+	gfx_v10_0_gpu_early_init(adev);
+
+	gfx_v10_0_alloc_ip_dump(adev);
+
+	r = amdgpu_gfx_sysfs_init(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+			      &adev->gfx.pfp.pfp_fw_gpu_addr,
+			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+}
+
+static void gfx_v10_0_ce_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj,
+			      &adev->gfx.ce.ce_fw_gpu_addr,
+			      (void **)&adev->gfx.ce.ce_fw_ptr);
+}
+
+static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+			      &adev->gfx.me.me_fw_gpu_addr,
+			      (void **)&adev->gfx.me.me_fw_ptr);
+}
+
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+	amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+	amdgpu_gfx_kiq_fini(adev, 0);
+
+	amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+	gfx_v10_0_pfp_fini(adev);
+	gfx_v10_0_ce_fini(adev);
+	gfx_v10_0_me_fini(adev);
+	gfx_v10_0_rlc_fini(adev);
+	gfx_v10_0_mec_fini(adev);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+		gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
+
+	gfx_v10_0_free_microcode(adev);
+	amdgpu_gfx_sysfs_fini(adev);
+
+	kfree(adev->gfx.ip_dump_core);
+	kfree(adev->gfx.ip_dump_compute_queues);
+	kfree(adev->gfx.ip_dump_gfx_queues);
+
+	return 0;
+}
+
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id)
+{
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+				     INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+				     instance);
+
+	if (se_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+	if (sh_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+	u32 data, mask;
+
+	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
+
+	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+					 adev->gfx.config.max_sh_per_se);
+
+	return (~data) & mask;
+}
+
+static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
+{
+	int i, j;
+	u32 data;
+	u32 active_rbs = 0;
+	u32 bitmap;
+	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+					adev->gfx.config.max_sh_per_se;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 0)) ||
+			     (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 3)) ||
+			     (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 6))) &&
+			    ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+				continue;
+			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+			data = gfx_v10_0_get_rb_active_bitmap(adev);
+			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+					       rb_bitmap_width_per_sh);
+		}
+	}
+	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	adev->gfx.config.backend_enable_mask = active_rbs;
+	adev->gfx.config.num_rbs = hweight32(active_rbs);
+}
+
+static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev)
+{
+	uint32_t num_sc;
+	uint32_t enabled_rb_per_sh;
+	uint32_t active_rb_bitmap;
+	uint32_t num_rb_per_sc;
+	uint32_t num_packer_per_sc;
+	uint32_t pa_sc_tile_steering_override;
+
+	/* for ASICs that integrates GFX v10.3
+	 * pa_sc_tile_steering_override should be set to 0
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+		return 0;
+
+	/* init num_sc */
+	num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
+			adev->gfx.config.num_sc_per_sh;
+	/* init num_rb_per_sc */
+	active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev);
+	enabled_rb_per_sh = hweight32(active_rb_bitmap);
+	num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh;
+	/* init num_packer_per_sc */
+	num_packer_per_sc = adev->gfx.config.num_packer_per_sc;
+
+	pa_sc_tile_steering_override = 0;
+	pa_sc_tile_steering_override |=
+		(order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) &
+		PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK;
+	pa_sc_tile_steering_override |=
+		(order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) &
+		PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK;
+	pa_sc_tile_steering_override |=
+		(order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) &
+		PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK;
+
+	return pa_sc_tile_steering_override;
+}
+
+#define DEFAULT_SH_MEM_BASES	(0x6000)
+
+static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
+				uint32_t first_vmid,
+				uint32_t last_vmid)
+{
+	uint32_t data;
+	uint32_t trap_config_vmid_mask = 0;
+	int i;
+
+	/* Calculate trap config vmid mask */
+	for (i = first_vmid; i < last_vmid; i++)
+		trap_config_vmid_mask |= (1 << i);
+
+	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
+			VMID_SEL, trap_config_vmid_mask);
+	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
+			TRAP_EN, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
+}
+
+static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+	int i;
+	uint32_t sh_mem_bases;
+
+	/*
+	 * Configure apertures:
+	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+	 */
+	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+		nv_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+	}
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/*
+	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
+	 * access. These should be enabled by FW for target VMIDs.
+	 */
+	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+	}
+
+	gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
+					AMDGPU_NUM_VMID);
+}
+
+static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+	int vmid;
+
+	/*
+	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
+	 * the driver can enable them for graphics. VMID0 should maintain
+	 * access so that HWS firmware can save/restore entries.
+	 */
+	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
+	}
+}
+
+
+static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
+{
+	int i, j, k;
+	int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1;
+	u32 tmp, wgp_active_bitmap = 0;
+	u32 gcrd_targets_disable_tcp = 0;
+	u32 utcl_invreq_disable = 0;
+	/*
+	 * GCRD_TARGETS_DISABLE field contains
+	 * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
+	 * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
+	 */
+	u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
+		2 * max_wgp_per_sh + /* TCP */
+		max_wgp_per_sh + /* SQC */
+		4); /* GL1C */
+	/*
+	 * UTCL1_UTCL0_INVREQ_DISABLE field contains
+	 * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
+	 * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
+	 */
+	u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
+		2 * max_wgp_per_sh + /* TCP */
+		2 * max_wgp_per_sh + /* SQC */
+		4 + /* RMI */
+		1); /* SQG */
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+			wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+			/*
+			 * Set corresponding TCP bits for the inactive WGPs in
+			 * GCRD_SA_TARGETS_DISABLE
+			 */
+			gcrd_targets_disable_tcp = 0;
+			/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+			utcl_invreq_disable = 0;
+
+			for (k = 0; k < max_wgp_per_sh; k++) {
+				if (!(wgp_active_bitmap & (1 << k))) {
+					gcrd_targets_disable_tcp |= 3 << (2 * k);
+					gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
+					utcl_invreq_disable |= (3 << (2 * k)) |
+						(3 << (2 * (max_wgp_per_sh + k)));
+				}
+			}
+
+			tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+			/* only override TCP & SQC bits */
+			tmp &= (0xffffffffU << (4 * max_wgp_per_sh));
+			tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+			WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+			tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+			/* only override TCP & SQC bits */
+			tmp &= (0xffffffffU << (3 * max_wgp_per_sh));
+			tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+			WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+		}
+	}
+
+	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
+{
+	/* TCCs are global (not instanced). */
+	uint32_t tcc_disable;
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) {
+		tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) |
+			      RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3);
+	} else {
+		tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) |
+			      RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE);
+	}
+
+	adev->gfx.config.tcc_disabled_mask =
+		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
+{
+	u32 tmp;
+	int i;
+
+	if (!amdgpu_sriov_vf(adev))
+		WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+	gfx_v10_0_setup_rb(adev);
+	gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
+	gfx_v10_0_get_tcc_info(adev);
+	adev->gfx.config.pa_sc_tile_steering_override =
+		gfx_v10_0_init_pa_sc_tile_steering_override(adev);
+
+	/* XXX SH_MEM regs */
+	/* where to put LDS, scratch, GPUVM in FSA64 space */
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+		nv_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		if (i != 0) {
+			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+				(adev->gmc.private_aperture_start >> 48));
+			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+				(adev->gmc.shared_aperture_start >> 48));
+			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+		}
+	}
+	nv_grbm_select(adev, 0, 0, 0, 0);
+
+	mutex_unlock(&adev->srbm_mutex);
+
+	gfx_v10_0_init_compute_vmid(adev);
+	gfx_v10_0_init_gds_vmid(adev);
+
+}
+
+static u32 gfx_v10_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	if (me != 0)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+	case 1:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+	default:
+		return 0;
+	}
+}
+
+static u32 gfx_v10_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+	if (me != 1)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+	case 1:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+	case 2:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+	case 3:
+		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+	default:
+		return 0;
+	}
+}
+
+static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+					       bool enable)
+{
+	u32 tmp, cp_int_cntl_reg;
+	int i, j;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+			if (cp_int_cntl_reg) {
+				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+						    enable ? 1 : 0);
+				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+			}
+		}
+	}
+}
+
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
+{
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+	/* csib */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
+		WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
+				adev->gfx.rlc.clear_state_gpu_addr >> 32);
+		WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
+				adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+		WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+	} else {
+		WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
+				adev->gfx.rlc.clear_state_gpu_addr >> 32);
+		WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
+				adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+		WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+	}
+	return 0;
+}
+
+static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+	WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
+}
+
+static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+	udelay(50);
+	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+	udelay(50);
+}
+
+static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t rlc_pg_cntl;
+
+	rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
+
+	if (!enable) {
+		/* RLC_PG_CNTL[23] = 0 (default)
+		 * RLC will wait for handshake acks with SMU
+		 * GFXOFF will be enabled
+		 * RLC_PG_CNTL[23] = 1
+		 * RLC will not issue any message to SMU
+		 * hence no handshake between SMU & RLC
+		 * GFXOFF will be disabled
+		 */
+		rlc_pg_cntl |= 0x800000;
+	} else
+		rlc_pg_cntl &= ~0x800000;
+	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
+{
+	/*
+	 * TODO: enable rlc & smu handshake until smu
+	 * and gfxoff feature works as expected
+	 */
+	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+		gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
+
+	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+	udelay(50);
+}
+
+static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
+
+	/* enable Save Restore Machine */
+	tmp = RREG32_SOC15(GC, 0, mmRLC_SRM_CNTL);
+	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+	WREG32_SOC15(GC, 0, mmRLC_SRM_CNTL, tmp);
+}
+
+static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_0 *hdr;
+	const __le32 *fw_data;
+	unsigned int i, fw_size;
+
+	if (!adev->gfx.rlc_fw)
+		return -EINVAL;
+
+	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
+		     RLCG_UCODE_LOADING_START_ADDRESS);
+
+	for (i = 0; i < fw_size; i++)
+		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA,
+			     le32_to_cpup(fw_data++));
+
+	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+	return 0;
+}
+
+static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+		adev->psp.autoload_supported) {
+
+		r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+		if (r)
+			return r;
+
+		gfx_v10_0_init_csb(adev);
+
+		gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
+		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+			gfx_v10_0_rlc_enable_srm(adev);
+	} else {
+		if (amdgpu_sriov_vf(adev)) {
+			gfx_v10_0_init_csb(adev);
+			return 0;
+		}
+
+		adev->gfx.rlc.funcs->stop(adev);
+
+		/* disable CG */
+		WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
+
+		/* disable PG */
+		WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+			/* legacy rlc firmware loading */
+			r = gfx_v10_0_rlc_load_microcode(adev);
+			if (r)
+				return r;
+		} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+			/* rlc backdoor autoload firmware */
+			r = gfx_v10_0_rlc_backdoor_autoload_enable(adev);
+			if (r)
+				return r;
+		}
+
+		gfx_v10_0_init_csb(adev);
+
+		gfx_v10_0_update_spm_vmid_internal(adev, 0xf);
+
+		adev->gfx.rlc.funcs->start(adev);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+			r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static struct {
+	FIRMWARE_ID	id;
+	unsigned int	offset;
+	unsigned int	size;
+} rlc_autoload_info[FIRMWARE_ID_MAX];
+
+static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
+{
+	int ret;
+	RLC_TABLE_OF_CONTENT *rlc_toc;
+
+	ret = amdgpu_bo_create_reserved(adev, adev->psp.toc.size_bytes, PAGE_SIZE,
+					AMDGPU_GEM_DOMAIN_GTT,
+					&adev->gfx.rlc.rlc_toc_bo,
+					&adev->gfx.rlc.rlc_toc_gpu_addr,
+					(void **)&adev->gfx.rlc.rlc_toc_buf);
+	if (ret) {
+		dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret);
+		return ret;
+	}
+
+	/* Copy toc from psp sos fw to rlc toc buffer */
+	memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc.start_addr, adev->psp.toc.size_bytes);
+
+	rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf;
+	while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) &&
+		(rlc_toc->id < FIRMWARE_ID_MAX)) {
+		if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) &&
+		    (rlc_toc->id <= FIRMWARE_ID_CP_MES)) {
+			/* Offset needs 4KB alignment */
+			rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE);
+		}
+
+		rlc_autoload_info[rlc_toc->id].id = rlc_toc->id;
+		rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4;
+		rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
+
+		rlc_toc++;
+	}
+
+	return 0;
+}
+
+static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+	uint32_t total_size = 0;
+	FIRMWARE_ID id;
+	int ret;
+
+	ret = gfx_v10_0_parse_rlc_toc(adev);
+	if (ret) {
+		dev_err(adev->dev, "failed to parse rlc toc\n");
+		return 0;
+	}
+
+	for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++)
+		total_size += rlc_autoload_info[id].size;
+
+	/* In case the offset in rlc toc ucode is aligned */
+	if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset)
+		total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset +
+				rlc_autoload_info[FIRMWARE_ID_MAX-1].size;
+
+	return total_size;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev)
+{
+	int r;
+	uint32_t total_size;
+
+	total_size = gfx_v10_0_calc_toc_total_size(adev);
+
+	r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.rlc.rlc_autoload_bo,
+				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
+				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo,
+			      &adev->gfx.rlc.rlc_toc_gpu_addr,
+			      (void **)&adev->gfx.rlc.rlc_toc_buf);
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+			      &adev->gfx.rlc.rlc_autoload_gpu_addr,
+			      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+						       FIRMWARE_ID id,
+						       const void *fw_data,
+						       uint32_t fw_size)
+{
+	uint32_t toc_offset;
+	uint32_t toc_fw_size;
+	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+	if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX)
+		return;
+
+	toc_offset = rlc_autoload_info[id].offset;
+	toc_fw_size = rlc_autoload_info[id].size;
+
+	if (fw_size == 0)
+		fw_size = toc_fw_size;
+
+	if (fw_size > toc_fw_size)
+		fw_size = toc_fw_size;
+
+	memcpy(ptr + toc_offset, fw_data, fw_size);
+
+	if (fw_size < toc_fw_size)
+		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+	void *data;
+	uint32_t size;
+
+	data = adev->gfx.rlc.rlc_toc_buf;
+	size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size;
+
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_RLC_TOC,
+						   data, size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+
+	/* pfp ucode */
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.pfp_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_CP_PFP,
+						   fw_data, fw_size);
+
+	/* ce ucode */
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.ce_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+		le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_CP_CE,
+						   fw_data, fw_size);
+
+	/* me ucode */
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.me_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_CP_ME,
+						   fw_data, fw_size);
+
+	/* rlc ucode */
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+		adev->gfx.rlc_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+		le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_RLC_G_UCODE,
+						   fw_data, fw_size);
+
+	/* mec1 ucode */
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.mec_fw->data;
+	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+		le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+		cp_hdr->jt_size * 4;
+	gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+						   FIRMWARE_ID_CP_MEC,
+						   fw_data, fw_size);
+	/* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
+}
+
+/* Temporarily put sdma part here */
+static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct sdma_firmware_header_v1_0 *sdma_hdr;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		sdma_hdr = (const struct sdma_firmware_header_v1_0 *)
+			adev->sdma.instance[i].fw->data;
+		fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data +
+			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+		fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes);
+
+		if (i == 0) {
+			gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+				FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size);
+			gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+				FIRMWARE_ID_SDMA0_JT,
+				(uint32_t *)fw_data +
+				sdma_hdr->jt_offset,
+				sdma_hdr->jt_size * 4);
+		} else if (i == 1) {
+			gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+				FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size);
+			gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+				FIRMWARE_ID_SDMA1_JT,
+				(uint32_t *)fw_data +
+				sdma_hdr->jt_offset,
+				sdma_hdr->jt_size * 4);
+		}
+	}
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+	uint32_t rlc_g_offset, rlc_g_size, tmp;
+	uint64_t gpu_addr;
+
+	gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+	gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+	gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+
+	rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset;
+	rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size;
+	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+	WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr));
+	WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr));
+	WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size);
+
+	tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR);
+	if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK |
+		   RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) {
+		DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
+		return -EINVAL;
+	}
+
+	tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+	if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) {
+		DRM_ERROR("RLC ROM should halt itself\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+	uint64_t addr;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Program me ucode address into intruction cache address register */
+	addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+		rlc_autoload_info[FIRMWARE_ID_CP_ME].offset;
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+	uint64_t addr;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Program ce ucode address into intruction cache address register */
+	addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+		rlc_autoload_info[FIRMWARE_ID_CP_CE].offset;
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+	uint64_t addr;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Program pfp ucode address into intruction cache address register */
+	addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+		rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset;
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+	uint64_t addr;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Program mec1 ucode address into intruction cache address register */
+	addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+		rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset;
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+	uint32_t cp_status;
+	uint32_t bootload_status;
+	int i, r;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		cp_status = RREG32_SOC15(GC, 0, mmCP_STAT);
+		bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS);
+		if ((cp_status == 0) &&
+		    (REG_GET_FIELD(bootload_status,
+			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+			break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev);
+		if (r)
+			return r;
+
+		r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev);
+		if (r)
+			return r;
+
+		r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev);
+		if (r)
+			return r;
+
+		r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+	int i;
+	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
+		WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
+	else
+		WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+
+	if (amdgpu_in_reset(adev) && !enable)
+		return 0;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v1_0 *pfp_hdr;
+	const __le32 *fw_data;
+	unsigned int i, fw_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.pfp_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.pfp.pfp_fw_obj,
+				      &adev->gfx.pfp.pfp_fw_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+		gfx_v10_0_pfp_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+		adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, 0);
+
+	for (i = 0; i < pfp_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_DATA,
+			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v1_0 *ce_hdr;
+	const __le32 *fw_data;
+	unsigned int i, fw_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.ce_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+		le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.ce.ce_fw_obj,
+				      &adev->gfx.ce.ce_fw_gpu_addr,
+				      (void **)&adev->gfx.ce.ce_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r);
+		gfx_v10_0_ce_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+		adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+		upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, 0);
+
+	for (i = 0; i < ce_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_DATA,
+			     le32_to_cpup(fw_data + ce_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v1_0 *me_hdr;
+	const __le32 *fw_data;
+	unsigned int i, fw_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	me_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.me_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.me.me_fw_obj,
+				      &adev->gfx.me.me_fw_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+		gfx_v10_0_me_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+		adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, 0);
+
+	for (i = 0; i < me_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_DATA,
+			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, mmCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+		return -EINVAL;
+
+	gfx_v10_0_cp_gfx_enable(adev, false);
+
+	r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+		return r;
+	}
+
+	r = gfx_v10_0_cp_gfx_load_ce_microcode(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load ce fw\n", r);
+		return r;
+	}
+
+	r = gfx_v10_0_cp_gfx_load_me_microcode(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+	int r, i;
+	int ctx_reg_offset;
+
+	/* init the CP */
+	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
+		     adev->gfx.config.max_hw_contexts - 1);
+	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
+
+	gfx_v10_0_cp_gfx_enable(adev, true);
+
+	ring = &adev->gfx.gfx_ring[0];
+	r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, 0x80000000);
+	amdgpu_ring_write(ring, 0x80000000);
+
+	for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				amdgpu_ring_write(ring,
+						  PACKET3(PACKET3_SET_CONTEXT_REG,
+							  ext->reg_count));
+				amdgpu_ring_write(ring, ext->reg_index -
+						  PACKET3_SET_CONTEXT_REG_START);
+				for (i = 0; i < ext->reg_count; i++)
+					amdgpu_ring_write(ring, ext->extent[i]);
+			}
+		}
+	}
+
+	ctx_reg_offset =
+		SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	amdgpu_ring_write(ring, ctx_reg_offset);
+	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+	amdgpu_ring_write(ring, 0x8000);
+	amdgpu_ring_write(ring, 0x8000);
+
+	amdgpu_ring_commit(ring);
+
+	/* submit cs packet to copy state 0 to next available state */
+	if (adev->gfx.num_gfx_rings > 1) {
+		/* maximum supported gfx ring is 2 */
+		ring = &adev->gfx.gfx_ring[1];
+		r = amdgpu_ring_alloc(ring, 2);
+		if (r) {
+			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+			return r;
+		}
+
+		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+		amdgpu_ring_write(ring, 0);
+
+		amdgpu_ring_commit(ring);
+	}
+	return 0;
+}
+
+static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+					 CP_PIPE_ID pipe)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL);
+	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+					  struct amdgpu_ring *ring)
+{
+	u32 tmp;
+
+	if (!amdgpu_async_gfx_ring) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+		if (ring->use_doorbell) {
+			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+						DOORBELL_OFFSET, ring->doorbell_index);
+			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+						DOORBELL_EN, 1);
+		} else {
+			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+						DOORBELL_EN, 0);
+		}
+		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
+	}
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+				    DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);
+		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+			     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_Sienna_Cichlid_MASK);
+		break;
+	default:
+		tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+				    DOORBELL_RANGE_LOWER, ring->doorbell_index);
+		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+			     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+		break;
+	}
+}
+
+static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	u32 tmp;
+	u32 rb_bufsz;
+	u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+	/* Set the write pointer delay */
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
+
+	/* set the RB to use vmid 0 */
+	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
+
+	/* Init gfx ring 0 for pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+	/* Set ring buffer size */
+	ring = &adev->gfx.gfx_ring[0];
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+	/* Initialize the ring buffer's write pointers */
+	ring->wptr = 0;
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+	/* set the wb address whether it's enabled or not */
+	rptr_addr = ring->rptr_gpu_addr;
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+	wptr_gpu_addr = ring->wptr_gpu_addr;
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+		     lower_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+		     upper_32_bits(wptr_gpu_addr));
+
+	mdelay(1);
+	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+	rb_addr = ring->gpu_addr >> 8;
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+	WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
+
+	gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* Init gfx ring 1 for pipe 1 */
+	if (adev->gfx.num_gfx_rings > 1) {
+		mutex_lock(&adev->srbm_mutex);
+		gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+		/* maximum supported gfx ring is 2 */
+		ring = &adev->gfx.gfx_ring[1];
+		rb_bufsz = order_base_2(ring->ring_size / 8);
+		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+		WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+		/* Initialize the ring buffer's write pointers */
+		ring->wptr = 0;
+		WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+		/* Set the wb address whether it's enabled or not */
+		rptr_addr = ring->rptr_gpu_addr;
+		WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+		WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+		wptr_gpu_addr = ring->wptr_gpu_addr;
+		WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+			     lower_32_bits(wptr_gpu_addr));
+		WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+			     upper_32_bits(wptr_gpu_addr));
+
+		mdelay(1);
+		WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+
+		rb_addr = ring->gpu_addr >> 8;
+		WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+		WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+		WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+
+		gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+	/* Switch to pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* start the ring */
+	gfx_v10_0_cp_gfx_start(adev);
+
+	return 0;
+}
+
+static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+	if (enable) {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(10, 3, 0):
+		case IP_VERSION(10, 3, 2):
+		case IP_VERSION(10, 3, 1):
+		case IP_VERSION(10, 3, 4):
+		case IP_VERSION(10, 3, 5):
+		case IP_VERSION(10, 3, 6):
+		case IP_VERSION(10, 3, 3):
+		case IP_VERSION(10, 3, 7):
+			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);
+			break;
+		default:
+			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+			break;
+		}
+	} else {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(10, 3, 0):
+		case IP_VERSION(10, 3, 2):
+		case IP_VERSION(10, 3, 1):
+		case IP_VERSION(10, 3, 4):
+		case IP_VERSION(10, 3, 5):
+		case IP_VERSION(10, 3, 6):
+		case IP_VERSION(10, 3, 3):
+		case IP_VERSION(10, 3, 7):
+			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,
+				     (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+				      CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+			break;
+		default:
+			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+				     (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+				      CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+			break;
+		}
+		adev->gfx.kiq[0].ring.sched.ready = false;
+	}
+	udelay(50);
+}
+
+static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v1_0 *mec_hdr;
+	const __le32 *fw_data;
+	unsigned int i;
+	u32 tmp;
+	u32 usec_timeout = 50000; /* Wait for 50 ms */
+
+	if (!adev->gfx.mec_fw)
+		return -EINVAL;
+
+	gfx_v10_0_cp_compute_enable(adev, false);
+
+	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+	fw_data = (const __le32 *)
+		(adev->gfx.mec_fw->data +
+		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+				       INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
+
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr &
+		     0xFFFFF000);
+	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+	/* MEC1 */
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0);
+
+	for (i = 0; i < mec_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
+			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+	/*
+	 * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
+	 * different microcode than MEC1.
+	 */
+
+	return 0;
+}
+
+static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
+{
+	uint32_t tmp;
+	struct amdgpu_device *adev = ring->adev;
+
+	/* tell RLC which is KIQ queue */
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
+		tmp &= 0xffffff00;
+		tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
+		break;
+	default:
+		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+		tmp &= 0xffffff00;
+		tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
+		break;
+	}
+}
+
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+					   struct v10_gfx_mqd *mqd,
+					   struct amdgpu_mqd_prop *prop)
+{
+	bool priority = 0;
+	u32 tmp;
+
+	/* set up default queue priority level
+	 * 0x0 = low priority, 0x1 = high priority
+	 */
+	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+		priority = 1;
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+	mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+				  struct amdgpu_mqd_prop *prop)
+{
+	struct v10_gfx_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr;
+	uint32_t tmp;
+	uint32_t rb_bufsz;
+
+	/* set up gfx hqd wptr */
+	mqd->cp_gfx_hqd_wptr = 0;
+	mqd->cp_gfx_hqd_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set up mqd control */
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+	mqd->cp_gfx_mqd_control = tmp;
+
+	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+	mqd->cp_gfx_hqd_vmid = 0;
+
+	/* set up gfx queue priority */
+	gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
+
+	/* set up time quantum */
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+	mqd->cp_gfx_hqd_quantum = tmp;
+
+	/* set up gfx hqd base. this is similar as CP_RB_BASE */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_gfx_hqd_rptr_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up rb_wptr_poll addr */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+	tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+	mqd->cp_gfx_hqd_cntl = tmp;
+
+	/* set up cp_doorbell_control */
+	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+	} else
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	mqd->cp_rb_doorbell_control = tmp;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
+
+	/* active the queue */
+	mqd->cp_gfx_hqd_active = 1;
+
+	return 0;
+}
+
+static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+
+		/*
+		 * if there are 2 gfx rings, set the lower doorbell
+		 * range of the first ring, otherwise the range of
+		 * the second ring will override the first ring
+		 */
+		if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+			gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1)
+			gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+		/* restore mqd with the backup copy */
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset the ring */
+		ring->wptr = 0;
+		*ring->wptr_cpu_addr = 0;
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_gfx_enable_kgq(adev, 0);
+	if (r)
+		return r;
+
+	return gfx_v10_0_cp_gfx_start(adev);
+}
+
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+				      struct amdgpu_mqd_prop *prop)
+{
+	struct v10_compute_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+	uint32_t tmp;
+
+	mqd->header = 0xC0310800;
+	mqd->compute_pipelinestat_enable = 0x00000001;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+	mqd->compute_misc_reserved = 0x00000003;
+
+	eop_base_addr = prop->eop_gpu_addr >> 8;
+	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+			(order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1));
+
+	mqd->cp_hqd_eop_control = tmp;
+
+	/* enable doorbell? */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_SOURCE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_HIT, 0);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	}
+
+	mqd->cp_hqd_pq_doorbell_control = tmp;
+
+	/* disable the queue if it's active */
+	mqd->cp_hqd_dequeue_request = 0;
+	mqd->cp_hqd_pq_rptr = 0;
+	mqd->cp_hqd_pq_wptr_lo = 0;
+	mqd->cp_hqd_pq_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+	mqd->cp_mqd_control = tmp;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+			    (order_base_2(prop->queue_size / 4) - 1));
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+#ifdef __BIG_ENDIAN
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+	mqd->cp_hqd_pq_control = tmp;
+
+	/* set the wb address whether it's enabled or not */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
+
+	/* set the vmid for the queue */
+	mqd->cp_hqd_vmid = 0;
+
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+	mqd->cp_hqd_persistent_state = tmp;
+
+	/* set MIN_IB_AVAIL_SIZE */
+	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
+	/* set static priority for a compute queue/ring */
+	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+	mqd->cp_hqd_active = prop->hqd_active;
+
+	return 0;
+}
+
+static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_compute_mqd *mqd = ring->mqd_ptr;
+	int j;
+
+	/* inactivate the queue */
+	if (amdgpu_sriov_vf(adev))
+		WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+
+	/* disable wptr polling */
+	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+	/* disable the queue if it's active */
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+		for (j = 0; j < adev->usec_timeout; j++) {
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+		       mqd->cp_hqd_dequeue_request);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+		       mqd->cp_hqd_pq_rptr);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+		       mqd->cp_hqd_pq_wptr_lo);
+		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+		       mqd->cp_hqd_pq_wptr_hi);
+	}
+
+	/* disable doorbells */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+	/* write the EOP addr */
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+	       mqd->cp_hqd_eop_base_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+	       mqd->cp_hqd_eop_base_addr_hi);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+	       mqd->cp_hqd_eop_control);
+
+	/* set the pointer to the MQD */
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+	       mqd->cp_mqd_base_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+	       mqd->cp_mqd_base_addr_hi);
+
+	/* set MQD vmid to 0 */
+	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+	       mqd->cp_mqd_control);
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+	       mqd->cp_hqd_pq_base_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+	       mqd->cp_hqd_pq_base_hi);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+	       mqd->cp_hqd_pq_control);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+		mqd->cp_hqd_pq_rptr_report_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+		mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+			(adev->doorbell_index.kiq * 2) << 2);
+		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+			(adev->doorbell_index.userqueue_end * 2) << 2);
+	}
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+	       mqd->cp_hqd_pq_doorbell_control);
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+	       mqd->cp_hqd_pq_wptr_lo);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+	       mqd->cp_hqd_pq_wptr_hi);
+
+	/* set the vmid for the queue */
+	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+	       mqd->cp_hqd_persistent_state);
+
+	/* activate the queue */
+	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+	       mqd->cp_hqd_active);
+
+	if (ring->use_doorbell)
+		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+	return 0;
+}
+
+static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_compute_mqd *mqd = ring->mqd_ptr;
+
+	gfx_v10_0_kiq_setting(ring);
+
+	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+		/* reset MQD to a clean status */
+		if (adev->gfx.kiq[0].mqd_backup)
+			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
+
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v10_0_kiq_init_register(ring);
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	} else {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		gfx_v10_0_kiq_init_register(ring);
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.kiq[0].mqd_backup)
+			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_compute_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+	if (!restore && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		nv_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		/* restore MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset ring buffer */
+		ring->wptr = 0;
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
+{
+	gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+	return 0;
+}
+
+static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	gfx_v10_0_cp_compute_enable(adev, true);
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+					     false);
+		if (r)
+			return r;
+	}
+
+	return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+	struct amdgpu_ring *ring;
+
+	if (!(adev->flags & AMD_IS_APU))
+		gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		/* legacy firmware loading */
+		r = gfx_v10_0_cp_gfx_load_microcode(adev);
+		if (r)
+			return r;
+
+		r = gfx_v10_0_cp_compute_load_microcode(adev);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v10_0_kiq_resume(adev);
+	if (r)
+		return r;
+
+	r = gfx_v10_0_kcq_resume(adev);
+	if (r)
+		return r;
+
+	if (!amdgpu_async_gfx_ring) {
+		r = gfx_v10_0_cp_gfx_resume(adev);
+		if (r)
+			return r;
+	} else {
+		r = gfx_v10_0_cp_async_gfx_ring_resume(adev);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		ring = &adev->gfx.gfx_ring[i];
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+	gfx_v10_0_cp_gfx_enable(adev, enable);
+	gfx_v10_0_cp_compute_enable(adev, enable);
+}
+
+static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+	uint32_t data, pattern = 0xDEADBEEF;
+
+	/*
+	 * check if mmVGT_ESGS_RING_SIZE_UMD
+	 * has been remapped to mmVGT_ESGS_RING_SIZE
+	 */
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+
+		if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) {
+			WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+			return true;
+		}
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data);
+		break;
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 7):
+		return true;
+	default:
+		data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+
+		if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
+			WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+			return true;
+		}
+		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
+		break;
+	}
+
+	return false;
+}
+
+static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	/*
+	 * Initialize cam_index to 0
+	 * index will auto-inc after each data writing
+	 */
+	WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+		data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_Sienna_Cichlid) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		break;
+	default:
+		/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+		WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+		/* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+		data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+			GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+		       (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
+			GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+	WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+}
+
+static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG);
+	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data);
+
+	data = RREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG);
+	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (!amdgpu_emu_mode)
+		gfx_v10_0_init_golden_registers(adev);
+
+	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+				       adev->gfx.cleaner_shader_ptr);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		/**
+		 * For gfx 10, rlc firmware loading relies on smu firmware is
+		 * loaded firstly, so in direct type, it has to load smc ucode
+		 * here before rlc.
+		 */
+		r = amdgpu_pm_load_smu_firmware(adev, NULL);
+		if (r)
+			return r;
+		gfx_v10_0_disable_gpa_mode(adev);
+	}
+
+	/* if GRBM CAM not remapped, set up the remapping */
+	if (!gfx_v10_0_check_grbm_cam_remapping(adev))
+		gfx_v10_0_setup_grbm_cam_remapping(adev);
+
+	gfx_v10_0_constants_init(adev);
+
+	r = gfx_v10_0_rlc_resume(adev);
+	if (r)
+		return r;
+
+	/*
+	 * init golden registers and rlc resume may override some registers,
+	 * reconfig them here
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) ||
+	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2))
+		gfx_v10_0_tcp_harvest(adev);
+
+	r = gfx_v10_0_cp_resume(adev);
+	if (r)
+		return r;
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
+		gfx_v10_3_program_pbb_mode(adev);
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
+		gfx_v10_3_set_power_brake_sequence(adev);
+
+	return r;
+}
+
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+
+	/* WA added for Vangogh asic fixing the SMU suspend failure
+	 * It needs to set power gating again during gfxoff control
+	 * otherwise the gfxoff disallowing will be failed to set.
+	 */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
+		gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
+
+	if (!adev->no_hw_access) {
+		if (amdgpu_async_gfx_ring) {
+			if (amdgpu_gfx_disable_kgq(adev, 0))
+				DRM_ERROR("KGQ disable failed\n");
+		}
+
+		if (amdgpu_gfx_disable_kcq(adev, 0))
+			DRM_ERROR("KCQ disable failed\n");
+	}
+
+	if (amdgpu_sriov_vf(adev)) {
+		gfx_v10_0_cp_gfx_enable(adev, false);
+		/* Remove the steps of clearing KIQ position.
+		 * It causes GFX hang when another Win guest is rendering.
+		 */
+		return 0;
+	}
+	gfx_v10_0_cp_enable(adev, false);
+	gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+
+	return 0;
+}
+
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v10_0_hw_fini(ip_block);
+}
+
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v10_0_hw_init(ip_block);
+}
+
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
+				GRBM_STATUS, GUI_ACTIVE))
+		return false;
+	else
+		return true;
+}
+
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned int i;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
+			GRBM_STATUS__GUI_ACTIVE_MASK;
+
+		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 grbm_soft_reset = 0;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	/* GRBM_STATUS */
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
+	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
+		   GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
+		   GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) {
+		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+						GRBM_SOFT_RESET, SOFT_RESET_CP,
+						1);
+		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+						GRBM_SOFT_RESET, SOFT_RESET_GFX,
+						1);
+	}
+
+	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+						GRBM_SOFT_RESET, SOFT_RESET_CP,
+						1);
+	}
+
+	/* GRBM_STATUS2 */
+	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+		if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))
+			grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+							GRBM_SOFT_RESET,
+							SOFT_RESET_RLC,
+							1);
+		break;
+	default:
+		if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+			grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+							GRBM_SOFT_RESET,
+							SOFT_RESET_RLC,
+							1);
+		break;
+	}
+
+	if (grbm_soft_reset) {
+		/* stop the rlc */
+		gfx_v10_0_rlc_stop(adev);
+
+		/* Disable GFX parsing/prefetching */
+		gfx_v10_0_cp_gfx_enable(adev, false);
+
+		/* Disable MEC parsing/prefetching */
+		gfx_v10_0_cp_compute_enable(adev, false);
+
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+		tmp |= grbm_soft_reset;
+		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+		udelay(50);
+
+		tmp &= ~grbm_soft_reset;
+		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+		/* Wait a little for things to settle down */
+		udelay(50);
+	}
+	return 0;
+}
+
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+	uint64_t clock, clock_lo, clock_hi, hi_check;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
+		break;
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
+		break;
+	case IP_VERSION(10, 3, 6):
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
+		break;
+	default:
+		preempt_disable();
+		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER);
+		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+		 * roughly every 42 seconds.
+		 */
+		if (hi_check != clock_hi) {
+			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER);
+			clock_hi = hi_check;
+		}
+		preempt_enable();
+		clock = clock_lo | (clock_hi << 32ULL);
+		break;
+	}
+	return clock;
+}
+
+static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+					   uint32_t vmid,
+					   uint32_t gds_base, uint32_t gds_size,
+					   uint32_t gws_base, uint32_t gws_size,
+					   uint32_t oa_base, uint32_t oa_size)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* GDS Base */
+	gfx_v10_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
+				    gds_base);
+
+	/* GDS Size */
+	gfx_v10_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
+				    gds_size);
+
+	/* GWS */
+	gfx_v10_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
+				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+	/* OA */
+	gfx_v10_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
+				    (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+		adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;
+		break;
+	default:
+		break;
+	}
+
+	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+					  AMDGPU_MAX_COMPUTE_RINGS);
+
+	gfx_v10_0_set_kiq_pm4_funcs(adev);
+	gfx_v10_0_set_ring_funcs(adev);
+	gfx_v10_0_set_irq_funcs(adev);
+	gfx_v10_0_set_gds_init(adev);
+	gfx_v10_0_set_rlc_funcs(adev);
+	gfx_v10_0_set_mqd_funcs(adev);
+
+	/* init rlcg reg access ctrl */
+	gfx_v10_0_init_rlcg_reg_access_ctrl(adev);
+
+	return gfx_v10_0_init_microcode(adev);
+}
+
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+	uint32_t rlc_cntl;
+
+	/* if RLC is not enabled, do nothing */
+	rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	uint32_t data;
+	unsigned int i;
+
+	data = RLC_SAFE_MODE__CMD_MASK;
+	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+
+		/* wait for RLC_SAFE_MODE */
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid),
+					   RLC_SAFE_MODE, CMD))
+				break;
+			udelay(1);
+		}
+		break;
+	default:
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+
+		/* wait for RLC_SAFE_MODE */
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE),
+					   RLC_SAFE_MODE, CMD))
+				break;
+			udelay(1);
+		}
+		break;
+	}
+}
+
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	uint32_t data;
+
+	data = RLC_SAFE_MODE__CMD_MASK;
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);
+		break;
+	default:
+		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+		break;
+	}
+}
+
+static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						      bool enable)
+{
+	uint32_t data, def;
+
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+		return;
+
+	/* It is disabled by HW by default */
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+		/* 0 - Disable some blocks' MGCG */
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+		WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000);
+		WREG32_SOC15(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xff000000);
+		WREG32_SOC15(GC, 0, mmCGTT_IA_CLK_CTRL, 0xff000000);
+
+		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* MGLS is a global flag to control all MGLS in GFX */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+			/* 2 - RLC memory Light sleep */
+			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+				if (def != data)
+					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+			}
+			/* 3 - CP memory Light sleep */
+			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+				if (def != data)
+					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+			}
+		}
+	} else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+		/* 1 - MGCG_OVERRIDE */
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* 2 - disable MGLS in CP */
+		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+		}
+
+		/* 3 - disable MGLS in RLC */
+		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+		}
+
+	}
+}
+
+static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t data, def;
+
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
+		return;
+
+	/* Enable 3D CGCG/CGLS */
+	if (enable) {
+		/* write cmd to clear cgcg/cgls ov */
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+		/* unset CGCG override */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+		/* update CGCG and CGLS override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* enable 3Dcgcg FSM(0x0000363f) */
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+		data = 0;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+
+		/* set IDLE_POLL_COUNT(0x00900100) */
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+	} else {
+		/* Disable CGCG/CGLS */
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
+		/* disable cgcg, cgls should be disabled */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+		/* disable cgcg and cgls in FSM */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+	}
+}
+
+static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+						      bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
+		return;
+
+	if (enable) {
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
+		/* unset CGCG override */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+
+		/* update CGCG and CGLS override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* enable cgcg FSM(0x0000363F) */
+		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+		data = 0;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+
+		/* set IDLE_POLL_COUNT(0x00900100) */
+		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+	} else {
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
+		/* reset CGCG/CGLS bits */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+		/* disable cgcg and cgls in FSM */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+	}
+}
+
+static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
+						      bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+		return;
+
+	if (enable) {
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+		/* unset FGCG override */
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+		/* update FGCG override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL);
+		/* unset RLC SRAM CLK GATER override */
+		data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+		/* update RLC SRAM CLK GATER override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data);
+	} else {
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+		/* reset FGCG bits */
+		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+		/* disable FGCG*/
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+		def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL);
+		/* reset RLC SRAM CLK GATER bits */
+		data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
+		/* disable RLC SRAM CLK*/
+		if (def != data)
+			WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data);
+	}
+}
+
+static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
+{
+	uint32_t reg_data = 0;
+	uint32_t reg_idx = 0;
+	uint32_t i;
+
+	const uint32_t tcp_ctrl_regs[] = {
+		mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
+	};
+
+	const uint32_t tcp_ctrl_regs_nv12[] = {
+		mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+	};
+
+	const uint32_t sm_ctlr_regs[] = {
+		mmCGTS_SA0_QUAD0_SM_CTRL_REG,
+		mmCGTS_SA0_QUAD1_SM_CTRL_REG,
+		mmCGTS_SA1_QUAD0_SM_CTRL_REG,
+		mmCGTS_SA1_QUAD1_SM_CTRL_REG
+	};
+
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) {
+		for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
+			reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+				  tcp_ctrl_regs_nv12[i];
+			reg_data = RREG32(reg_idx);
+			reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+			WREG32(reg_idx, reg_data);
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
+			reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+				  tcp_ctrl_regs[i];
+			reg_data = RREG32(reg_idx);
+			reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+			WREG32(reg_idx, reg_data);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
+		reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
+			  sm_ctlr_regs[i];
+		reg_data = RREG32(reg_idx);
+		reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
+		reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
+		WREG32(reg_idx, reg_data);
+	}
+}
+
+static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	if (enable) {
+		/* enable FGCG firstly*/
+		gfx_v10_0_update_fine_grain_clock_gating(adev, enable);
+		/* CGCG/CGLS should be enabled after MGCG/MGLS
+		 * ===  MGCG + MGLS ===
+		 */
+		gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+		/* ===  CGCG /CGLS for GFX 3D Only === */
+		gfx_v10_0_update_3d_clock_gating(adev, enable);
+		/* ===  CGCG + CGLS === */
+		gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+
+		if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+		     IP_VERSION(10, 1, 10)) ||
+		    (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+		     IP_VERSION(10, 1, 1)) ||
+		    (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+		     IP_VERSION(10, 1, 2)))
+			gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
+	} else {
+		/* CGCG/CGLS should be disabled before MGCG/MGLS
+		 * ===  CGCG + CGLS ===
+		 */
+		gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+		/* ===  CGCG /CGLS for GFX 3D Only === */
+		gfx_v10_0_update_3d_clock_gating(adev, enable);
+		/* ===  MGCG + MGLS === */
+		gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+		/* disable fgcg at last*/
+		gfx_v10_0_update_fine_grain_clock_gating(adev, enable);
+	}
+
+	if (adev->cg_flags &
+	    (AMD_CG_SUPPORT_GFX_MGCG |
+	     AMD_CG_SUPPORT_GFX_CGLS |
+	     AMD_CG_SUPPORT_GFX_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGLS))
+		gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+	return 0;
+}
+
+static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
+					       unsigned int vmid)
+{
+	u32 reg, pre_data, data;
+
+	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
+	/* not for *_SOC15 */
+	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+		pre_data = RREG32_NO_KIQ(reg);
+	else
+		pre_data = RREG32(reg);
+
+	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+	if (pre_data != data) {
+		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+			WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
+		} else
+			WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+	}
+}
+
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
+{
+	amdgpu_gfx_off_ctrl(adev, false);
+
+	gfx_v10_0_update_spm_vmid_internal(adev, vmid);
+
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+					uint32_t offset,
+					struct soc15_reg_rlcg *entries, int arr_size)
+{
+	int i;
+	uint32_t reg;
+
+	if (!entries)
+		return false;
+
+	for (i = 0; i < arr_size; i++) {
+		const struct soc15_reg_rlcg *entry;
+
+		entry = &entries[i];
+		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+		if (offset == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+	return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
+}
+
+static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	u32 data = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
+
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, data);
+
+	/*
+	 * CGPG enablement required and the register to program the hysteresis value
+	 * RLC_PG_DELAY_3.CGCG_ACTIVE_BEFORE_CGPG to the desired CGPG hysteresis value
+	 * in refclk count. Note that RLC FW is modified to take 16 bits from
+	 * RLC_PG_DELAY_3[15:0] as the hysteresis instead of just 8 bits.
+	 *
+	 * The recommendation from RLC team is setting RLC_PG_DELAY_3 to 200us as part)
+	 * of CGPG enablement starting point.
+	 * Power/performance team will optimize it and might give a new value later.
+	 */
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(10, 3, 1):
+		case IP_VERSION(10, 3, 3):
+		case IP_VERSION(10, 3, 6):
+		case IP_VERSION(10, 3, 7):
+			data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh;
+			WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	gfx_v10_cntl_power_gating(adev, enable);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
+	.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+	.set_safe_mode = gfx_v10_0_set_safe_mode,
+	.unset_safe_mode = gfx_v10_0_unset_safe_mode,
+	.init = gfx_v10_0_rlc_init,
+	.get_csb_size = gfx_v10_0_get_csb_size,
+	.get_csb_buffer = gfx_v10_0_get_csb_buffer,
+	.resume = gfx_v10_0_rlc_resume,
+	.stop = gfx_v10_0_rlc_stop,
+	.reset = gfx_v10_0_rlc_reset,
+	.start = gfx_v10_0_rlc_start,
+	.update_spm_vmid = gfx_v10_0_update_spm_vmid,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
+	.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+	.set_safe_mode = gfx_v10_0_set_safe_mode,
+	.unset_safe_mode = gfx_v10_0_unset_safe_mode,
+	.init = gfx_v10_0_rlc_init,
+	.get_csb_size = gfx_v10_0_get_csb_size,
+	.get_csb_buffer = gfx_v10_0_get_csb_buffer,
+	.resume = gfx_v10_0_rlc_resume,
+	.stop = gfx_v10_0_rlc_stop,
+	.reset = gfx_v10_0_rlc_reset,
+	.start = gfx_v10_0_rlc_start,
+	.update_spm_vmid = gfx_v10_0_update_spm_vmid,
+	.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
+};
+
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	bool enable = (state == AMD_PG_STATE_GATE);
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		amdgpu_gfx_off_ctrl(adev, enable);
+		break;
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 7):
+		if (!enable)
+			amdgpu_gfx_off_ctrl(adev, false);
+
+		gfx_v10_cntl_pg(adev, enable);
+
+		if (enable)
+			amdgpu_gfx_off_ctrl(adev, true);
+
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		gfx_v10_0_update_gfx_clock_gating(adev,
+						 state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int data;
+
+	/* AMD_CG_SUPPORT_GFX_FGCG */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+	/* AMD_CG_SUPPORT_GFX_MGCG */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+	/* AMD_CG_SUPPORT_GFX_CGCG */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
+	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+	/* AMD_CG_SUPPORT_GFX_RLC_LS */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
+	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+	/* AMD_CG_SUPPORT_GFX_CP_LS */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
+	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
+	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	/* gfx10 is 32bit rptr*/
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	} else {
+		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
+	}
+
+	return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+			     lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+			     upper_32_bits(ring->wptr));
+	}
+}
+
+static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+	/* gfx10 hardware is 32bit rptr */
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell)
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	else
+		BUG();
+	return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		BUG(); /* only DOORBELL method supported on gfx10 now */
+	}
+}
+
+static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 ref_and_mask, reg_mem_engine;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+		switch (ring->me) {
+		case 1:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+			break;
+		case 2:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+			break;
+		default:
+			return;
+		}
+		reg_mem_engine = 0;
+	} else {
+		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
+		reg_mem_engine = 1; /* pfp */
+	}
+
+	gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+			       ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+				       struct amdgpu_job *job,
+				       struct amdgpu_ib *ib,
+				       uint32_t flags)
+{
+	unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 header, control = 0;
+
+	if (ib->flags & AMDGPU_IB_FLAG_CE)
+		header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
+	else
+		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+	control |= ib->length_dw | (vmid << 24);
+
+	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+		control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+		if (flags & AMDGPU_IB_PREEMPTED)
+			control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
+			gfx_v10_0_ring_emit_de_meta(ring,
+				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+	}
+
+	amdgpu_ring_write(ring, header);
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+		(2 << 0) |
+#endif
+		lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+					   struct amdgpu_job *job,
+					   struct amdgpu_ib *ib,
+					   uint32_t flags)
+{
+	unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+				(2 << 0) |
+#endif
+				lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+				     u64 seq, unsigned int flags)
+{
+	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+	/* RELEASE_MEM - flush caches, send int */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
+				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
+				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+	/*
+	 * the address should be Qword aligned if 64bit write, Dword
+	 * aligned if only send 32bit data low (discard data high)
+	 */
+	if (write64bit)
+		BUG_ON(addr & 0x7);
+	else
+		BUG_ON(addr & 0x3);
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+	amdgpu_ring_write(ring, upper_32_bits(seq));
+	amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+			       upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+				   uint16_t pasid, uint32_t flush_type,
+				   bool all_hub, uint8_t dst_sel)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					 unsigned int vmid, uint64_t pd_addr)
+{
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	/* compute doesn't have PFP */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+		/* sync PFP to ME, otherwise we might get invalid PFP reads */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		amdgpu_ring_write(ring, 0x0);
+	}
+}
+
+static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+					  u64 seq, unsigned int flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* we only allocate 32bit for each seq wb address */
+	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	/* write fence seq to the "addr" */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+
+	if (flags & AMDGPU_FENCE_FLAG_INT) {
+		/* set register to trigger INT */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+	}
+}
+
+static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+					 uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	if (ring->adev->gfx.mcbp)
+		gfx_v10_0_ring_emit_ce_meta(ring,
+				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		dw2 |= 0x10002;
+
+		/* set load_ce_ram if preamble presented */
+		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+			dw2 |= 0x10000000;
+	} else {
+		/* still load_ce_ram if this is the first time preamble presented
+		 * although there is no context switch happens.
+		 */
+		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+			dw2 |= 0x10000000;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
+static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+						       uint64_t addr)
+{
+	unsigned int ret;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	/* discard following DWs if *cond_exec_gpu_addr==0 */
+	amdgpu_ring_write(ring, 0);
+	ret = ring->wptr & ring->buf_mask;
+	/* patch dummy value later */
+	amdgpu_ring_write(ring, 0);
+
+	return ret;
+}
+
+static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ++ring->trail_seq);
+	amdgpu_ring_commit(kiq_ring);
+
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+	}
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
+}
+
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_ce_ib_state ce_payload = {0};
+	uint64_t offset, ce_payload_gpu_addr;
+	void *ce_payload_cpu_addr;
+	int cnt;
+
+	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
+
+	offset = offsetof(struct v10_gfx_meta_data, ce_payload);
+	ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+				 WRITE_DATA_DST_SEL(8) |
+				 WR_CONFIRM) |
+				 WRITE_DATA_CACHE_POLICY(0));
+	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+					   sizeof(ce_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+					   sizeof(ce_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_de_ib_state de_payload = {0};
+	uint64_t offset, gds_addr, de_payload_gpu_addr;
+	void *de_payload_cpu_addr;
+	int cnt;
+
+	offset = offsetof(struct v10_gfx_meta_data, de_payload);
+	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+			 PAGE_SIZE);
+
+	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+				 WRITE_DATA_DST_SEL(8) |
+				 WR_CONFIRM) |
+				 WRITE_DATA_CACHE_POLICY(0));
+	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+					   sizeof(de_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+					   sizeof(de_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
+{
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t reg_val_offs)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+	amdgpu_ring_write(ring, 0 |	/* src: register*/
+				(5 << 8) |	/* dst: memory */
+				(1 << 20));	/* write confirm */
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+}
+
+static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+				   uint32_t val)
+{
+	uint32_t cmd = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+		break;
+	case AMDGPU_RING_TYPE_KIQ:
+		cmd = (1 << 16); /* no inc addr */
+		break;
+	default:
+		cmd = WR_CONFIRM;
+		break;
+	}
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, cmd);
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
+{
+	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				       ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
+static void
+gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+				      uint32_t me, uint32_t pipe,
+				      enum amdgpu_interrupt_state state)
+{
+	uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+	if (!me) {
+		switch (pipe) {
+		case 0:
+			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+			break;
+		case 1:
+			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+						     int me, int pipe,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 mec_int_cntl, mec_int_cntl_reg;
+
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+
+	if (me == 1) {
+		switch (pipe) {
+		case 0:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+			break;
+		case 1:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+			break;
+		case 2:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+			break;
+		case 3:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned int type,
+					    enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+		gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+		gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+		gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	int i;
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+
+	DRM_DEBUG("IH: CP EOP\n");
+
+	me_id = (entry->ring_id & 0x0c) >> 2;
+	pipe_id = (entry->ring_id & 0x03) >> 0;
+	queue_id = (entry->ring_id & 0x70) >> 4;
+
+	switch (me_id) {
+	case 0:
+		if (pipe_id == 0)
+			amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+		else
+			amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+		break;
+	case 1:
+	case 2:
+		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+			ring = &adev->gfx.compute_ring[i];
+			/* Per-queue interrupt is supported for MEC starting from VI.
+			 * The interrupt can only be enabled/disabled per pipe instead
+			 * of per queue.
+			 */
+			if ((ring->me == me_id) &&
+			    (ring->pipe == pipe_id) &&
+			    (ring->queue == queue_id))
+				amdgpu_fence_process(ring);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+					      struct amdgpu_irq_src *source,
+					      unsigned int type,
+					      enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v10_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *source,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v10_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+					       struct amdgpu_irq_src *source,
+					       unsigned int type,
+					       enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v10_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_INSTR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
+					struct amdgpu_iv_entry *entry)
+{
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+	int i;
+
+	me_id = (entry->ring_id & 0x0c) >> 2;
+	pipe_id = (entry->ring_id & 0x03) >> 0;
+	queue_id = (entry->ring_id & 0x70) >> 4;
+
+	switch (me_id) {
+	case 0:
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			if (ring->me == me_id && ring->pipe == pipe_id &&
+			    ring->queue == queue_id)
+				drm_sched_fault(&ring->sched);
+		}
+		break;
+	case 1:
+	case 2:
+		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+			ring = &adev->gfx.compute_ring[i];
+			if (ring->me == me_id && ring->pipe == pipe_id &&
+			    ring->queue == queue_id)
+				drm_sched_fault(&ring->sched);
+		}
+		break;
+	default:
+		BUG();
+	}
+}
+
+static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal register access in command stream\n");
+	gfx_v10_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v10_0_bad_op_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal opcode in command stream \n");
+	gfx_v10_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
+				   struct amdgpu_irq_src *source,
+				   struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal instruction in command stream\n");
+	gfx_v10_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+					     struct amdgpu_irq_src *src,
+					     unsigned int type,
+					     enum amdgpu_interrupt_state state)
+{
+	uint32_t tmp, target;
+	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+	if (ring->me == 1)
+		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+	else
+		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
+	target += ring->pipe;
+
+	switch (type) {
+	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+		if (state == AMDGPU_IRQ_STATE_DISABLE) {
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 0);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+			tmp = RREG32_SOC15_IP(GC, target);
+			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 0);
+			WREG32_SOC15_IP(GC, target, tmp);
+		} else {
+			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 1);
+			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+			tmp = RREG32_SOC15_IP(GC, target);
+			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 1);
+			WREG32_SOC15_IP(GC, target, tmp);
+		}
+		break;
+	default:
+		BUG(); /* kiq only support GENERIC2_INT now */
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+	me_id = (entry->ring_id & 0x0c) >> 2;
+	pipe_id = (entry->ring_id & 0x03) >> 0;
+	queue_id = (entry->ring_id & 0x70) >> 4;
+	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
+		   me_id, pipe_id, queue_id);
+
+	amdgpu_fence_process(ring);
+	return 0;
+}
+
+static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	const unsigned int gcr_cntl =
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+	/* Header itself is a NOP packet */
+	if (num_nop == 1) {
+		amdgpu_ring_write(ring, ring->funcs->nop);
+		return;
+	}
+
+	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
+	amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+	u32 tmp;
+	u64 addr;
+	int r;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	addr = amdgpu_bo_gpu_offset(ring->mqd_obj) +
+		offsetof(struct v10_gfx_mqd, cp_gfx_hqd_active);
+	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+	if (ring->pipe == 0)
+		tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << ring->queue);
+	else
+		tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << ring->queue);
+
+	gfx_v10_0_ring_emit_wreg(kiq_ring,
+				 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
+	gfx_v10_0_wait_reg_mem(kiq_ring, 0, 1, 0,
+			       lower_32_bits(addr), upper_32_bits(addr),
+			       0, 1, 0x20);
+	gfx_v10_0_ring_emit_reg_wait(kiq_ring,
+				     SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff);
+	amdgpu_ring_commit(kiq_ring);
+	r = amdgpu_ring_test_ring(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+	if (r)
+		return r;
+
+	r = gfx_v10_0_kgq_init_queue(ring, true);
+	if (r) {
+		DRM_ERROR("fail to init kgq\n");
+		return r;
+	}
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+	kiq->pmf->kiq_map_queues(kiq_ring, ring);
+	amdgpu_ring_commit(kiq_ring);
+	r = amdgpu_ring_test_ring(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+	if (r)
+		return r;
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+	int i, r;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
+				   0, 0);
+	amdgpu_ring_commit(kiq_ring);
+	r = amdgpu_ring_test_ring(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+	if (r)
+		return r;
+
+	/* make sure dequeue is complete*/
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+			break;
+		udelay(1);
+	}
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+	if (r) {
+		dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+		return r;
+	}
+
+	r = gfx_v10_0_kcq_init_queue(ring, true);
+	if (r) {
+		dev_err(adev->dev, "fail to init kcq\n");
+		return r;
+	}
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+	kiq->pmf->kiq_map_queues(kiq_ring, ring);
+	amdgpu_ring_commit(kiq_ring);
+	r = amdgpu_ring_test_ring(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+	if (r)
+		return r;
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	for (i = 0; i < reg_count; i++)
+		drm_printf(p, "%-50s \t 0x%08x\n",
+			   gc_reg_list_10_1[i].reg_name,
+			   adev->gfx.ip_dump_core[i]);
+
+	/* print compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.mec.num_mec,
+		   adev->gfx.mec.num_pipe_per_mec,
+		   adev->gfx.mec.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   "mmCP_MEC_ME2_HEADER_DUMP",
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+					else
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   gc_cp_reg_list_10[reg].reg_name,
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+
+	/* print gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.me.num_me,
+		   adev->gfx.me.num_pipe_per_me,
+		   adev->gfx.me.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					drm_printf(p, "%-50s \t 0x%08x\n",
+						   gc_gfx_queue_reg_list_10[reg].reg_name,
+						   adev->gfx.ip_dump_gfx_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+}
+
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	amdgpu_gfx_off_ctrl(adev, false);
+	for (i = 0; i < reg_count; i++)
+		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				/* ME0 is for GFX so start from 1 for CP */
+				nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+
+				for (reg = 0; reg < reg_count; reg++) {
+					if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+					else
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_ENTRY_OFFSET(
+								       gc_cp_reg_list_10[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				nv_grbm_select(adev, i, j, k, 0);
+
+				for (reg = 0; reg < reg_count; reg++) {
+					adev->gfx.ip_dump_gfx_queues[index + reg] =
+						RREG32(SOC15_REG_ENTRY_OFFSET(
+							gc_gfx_queue_reg_list_10[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+	/* Emit the cleaner shader */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_begin_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_end_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
+	.name = "gfx_v10_0",
+	.early_init = gfx_v10_0_early_init,
+	.late_init = gfx_v10_0_late_init,
+	.sw_init = gfx_v10_0_sw_init,
+	.sw_fini = gfx_v10_0_sw_fini,
+	.hw_init = gfx_v10_0_hw_init,
+	.hw_fini = gfx_v10_0_hw_fini,
+	.suspend = gfx_v10_0_suspend,
+	.resume = gfx_v10_0_resume,
+	.is_idle = gfx_v10_0_is_idle,
+	.wait_for_idle = gfx_v10_0_wait_for_idle,
+	.soft_reset = gfx_v10_0_soft_reset,
+	.set_clockgating_state = gfx_v10_0_set_clockgating_state,
+	.set_powergating_state = gfx_v10_0_set_powergating_state,
+	.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+	.dump_ip_state = gfx_v10_ip_dump,
+	.print_ip_state = gfx_v10_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
+	.get_rptr = gfx_v10_0_ring_get_rptr_gfx,
+	.get_wptr = gfx_v10_0_ring_get_wptr_gfx,
+	.set_wptr = gfx_v10_0_ring_set_wptr_gfx,
+	.emit_frame_size = /* totally 242 maximum if 16 IBs */
+		5 + /* COND_EXEC */
+		7 + /* PIPELINE_SYNC */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		4 + /* VM_FLUSH */
+		8 + /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		4 + /* double SWITCH_BUFFER,
+		     * the first COND_EXEC jump to the place
+		     * just prior to this double SWITCH_BUFFER
+		     */
+		5 + /* COND_EXEC */
+		7 + /* HDP_flush */
+		4 + /* VGT_flush */
+		14 + /*	CE_META */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		2 + /* SWITCH_BUFFER */
+		8 + /* gfx_v10_0_emit_mem_sync */
+		2, /* gfx_v10_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	4, /* gfx_v10_0_ring_emit_ib_gfx */
+	.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
+	.emit_fence = gfx_v10_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v10_0_ring_test_ring,
+	.test_ib = gfx_v10_0_ring_test_ib,
+	.insert_nop = gfx_v10_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
+	.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
+	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
+	.preempt_ib = gfx_v10_0_ring_preempt_ib,
+	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
+	.emit_wreg = gfx_v10_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+	.reset = gfx_v10_0_reset_kgq,
+	.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v10_0_ring_begin_use,
+	.end_use = gfx_v10_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
+	.type = AMDGPU_RING_TYPE_COMPUTE,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		20 + /* gfx_v10_0_ring_emit_gds_switch */
+		7 + /* gfx_v10_0_ring_emit_hdp_flush */
+		5 + /* hdp invalidate */
+		7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v10_0_ring_emit_vm_flush */
+		8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+		8 + /* gfx_v10_0_emit_mem_sync */
+		2, /* gfx_v10_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	7, /* gfx_v10_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v10_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v10_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v10_0_ring_test_ring,
+	.test_ib = gfx_v10_0_ring_test_ib,
+	.insert_nop = gfx_v10_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_wreg = gfx_v10_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v10_0_emit_mem_sync,
+	.reset = gfx_v10_0_reset_kcq,
+	.emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v10_0_ring_begin_use,
+	.end_use = gfx_v10_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+	.type = AMDGPU_RING_TYPE_KIQ,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		20 + /* gfx_v10_0_ring_emit_gds_switch */
+		7 + /* gfx_v10_0_ring_emit_hdp_flush */
+		5 + /*hdp invalidate */
+		7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+	.emit_ib_size =	7, /* gfx_v10_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v10_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v10_0_ring_emit_fence_kiq,
+	.test_ring = gfx_v10_0_ring_test_ring,
+	.test_ib = gfx_v10_0_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_rreg = gfx_v10_0_ring_emit_rreg,
+	.emit_wreg = gfx_v10_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = {
+	.set = gfx_v10_0_set_eop_interrupt_state,
+	.process = gfx_v10_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
+	.set = gfx_v10_0_set_priv_reg_fault_state,
+	.process = gfx_v10_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_bad_op_irq_funcs = {
+	.set = gfx_v10_0_set_bad_op_fault_state,
+	.process = gfx_v10_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
+	.set = gfx_v10_0_set_priv_inst_fault_state,
+	.process = gfx_v10_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = {
+	.set = gfx_v10_0_kiq_set_interrupt_state,
+	.process = gfx_v10_0_kiq_irq,
+};
+
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+	adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
+
+	adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+	adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+
+	adev->gfx.priv_reg_irq.num_types = 1;
+	adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+
+	adev->gfx.bad_op_irq.num_types = 1;
+	adev->gfx.bad_op_irq.funcs = &gfx_v10_0_bad_op_irq_funcs;
+
+	adev->gfx.priv_inst_irq.num_types = 1;
+	adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 1):
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+	case IP_VERSION(10, 3, 6):
+	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(10, 3, 7):
+		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
+		break;
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 3, 0):
+		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs_sriov;
+		break;
+	default:
+		break;
+	}
+}
+
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
+{
+	unsigned int total_cu = adev->gfx.config.max_cu_per_sh *
+			    adev->gfx.config.max_sh_per_se *
+			    adev->gfx.config.max_shader_engines;
+
+	adev->gds.gds_size = 0x10000;
+	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+	adev->gds.gws_size = 64;
+	adev->gds.oa_size = 16;
+}
+
+static void gfx_v10_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+	/* set gfx eng mqd */
+	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+		sizeof(struct v10_gfx_mqd);
+	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+		gfx_v10_0_gfx_mqd_init;
+	/* set compute eng mqd */
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+		sizeof(struct v10_compute_mqd);
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+		gfx_v10_0_compute_mqd_init;
+}
+
+static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+							  u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 disabled_mask =
+		~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+	u32 efuse_setting = 0;
+	u32 vbios_setting = 0;
+
+	efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+	efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+	vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+	vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+	disabled_mask |= efuse_setting | vbios_setting;
+
+	return (~disabled_mask);
+}
+
+static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 wgp_idx, wgp_active_bitmap;
+	u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+	wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+	cu_active_bitmap = 0;
+
+	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+		/* if there is one WGP enabled, it means 2 CUs will be enabled */
+		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+		if (wgp_active_bitmap & (1 << wgp_idx))
+			cu_active_bitmap |= cu_bitmap_per_wgp;
+	}
+
+	return cu_active_bitmap;
+}
+
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+				 struct amdgpu_cu_info *cu_info)
+{
+	int i, j, k, counter, active_cu_number = 0;
+	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+	unsigned int disable_masks[4 * 2];
+
+	if (!adev || !cu_info)
+		return -EINVAL;
+
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (((amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 0)) ||
+			     (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 3)) ||
+			     (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 6)) ||
+			     (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			      IP_VERSION(10, 3, 7))) &&
+			    ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
+				continue;
+			mask = 1;
+			ao_bitmap = 0;
+			counter = 0;
+			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+			if (i < 4 && j < 2)
+				gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
+					adev, disable_masks[i * 2 + j]);
+			bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
+			cu_info->bitmap[0][i][j] = bitmap;
+
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+				if (bitmap & mask) {
+					if (counter < adev->gfx.config.max_cu_per_sh)
+						ao_bitmap |= mask;
+					counter++;
+				}
+				mask <<= 1;
+			}
+			active_cu_number += counter;
+			if (i < 2 && j < 2)
+				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+		}
+	}
+	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	cu_info->number = active_cu_number;
+	cu_info->ao_cu_mask = ao_cu_mask;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+	return 0;
+}
+
+static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev)
+{
+	uint32_t efuse_setting, vbios_setting, disabled_sa, max_sa_mask;
+
+	efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE);
+	efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+	efuse_setting >>= CC_GC_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+	vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SA_UNIT_DISABLE);
+	vbios_setting &= GC_USER_SA_UNIT_DISABLE__SA_DISABLE_MASK;
+	vbios_setting >>= GC_USER_SA_UNIT_DISABLE__SA_DISABLE__SHIFT;
+
+	max_sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+						adev->gfx.config.max_shader_engines);
+	disabled_sa = efuse_setting | vbios_setting;
+	disabled_sa &= max_sa_mask;
+
+	return disabled_sa;
+}
+
+static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev)
+{
+	uint32_t max_sa_per_se, max_sa_per_se_mask, max_shader_engines;
+	uint32_t disabled_sa_mask, se_index, disabled_sa_per_se;
+
+	disabled_sa_mask = gfx_v10_3_get_disabled_sa(adev);
+
+	max_sa_per_se = adev->gfx.config.max_sh_per_se;
+	max_sa_per_se_mask = (1 << max_sa_per_se) - 1;
+	max_shader_engines = adev->gfx.config.max_shader_engines;
+
+	for (se_index = 0; max_shader_engines > se_index; se_index++) {
+		disabled_sa_per_se = disabled_sa_mask >> (se_index * max_sa_per_se);
+		disabled_sa_per_se &= max_sa_per_se_mask;
+		if (disabled_sa_per_se == max_sa_per_se_mask) {
+			WREG32_FIELD15(GC, 0, PA_SC_ENHANCE_3, FORCE_PBB_WORKLOAD_MODE_TO_ZERO, 1);
+			break;
+		}
+	}
+}
+
+static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+		     (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) |
+		     (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) |
+		     (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL);
+	WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA,
+		     (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) |
+		     (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) |
+		     (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) |
+		     (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid,
+		     (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) |
+		     (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) |
+		     (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT));
+
+	WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL);
+
+	WREG32_SOC15(GC, 0, mmDIDT_IND_DATA,
+		     (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT));
+}
+
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 10,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gfx_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
new file mode 100644
index 000000000000..b442e50324d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V10_0_H__
+#define __GFX_V10_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v10_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..f67569ccf9f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_1_10 */
+static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
+	0xb0804004, 0xbf8a0000,
+	0xbf068100, 0xbf840023,
+	0xbe8203b8, 0xbefc0380,
+	0x7e008480, 0x7e028480,
+	0x7e048480, 0x7e068480,
+	0x7e088480, 0x7e0a8480,
+	0x7e0c8480, 0x7e0e8480,
+	0xbefc0302, 0x80828802,
+	0xbf84fff5, 0xbe8203ff,
+	0x80000000, 0x87020102,
+	0xbf840012, 0xbefe03c1,
+	0xbeff03c1, 0xd7650001,
+	0x0001007f, 0xd7660001,
+	0x0002027e, 0x16020288,
+	0xbe8203bf, 0xbefc03c1,
+	0xd9382000, 0x00020201,
+	0xd9386040, 0x00040401,
+	0xd70f6a01, 0x000202ff,
+	0x00000400, 0x80828102,
+	0xbf84fff7, 0xbefc03ff,
+	0x00000068, 0xbe803000,
+	0xbe813000, 0xbe823000,
+	0xbe833000, 0x80fc847c,
+	0xbf84fffa, 0xbeea0480,
+	0xbeec0480, 0xbeee0480,
+	0xbef00480, 0xbef20480,
+	0xbef40480, 0xbef60480,
+	0xbef80480, 0xbefa0480,
+	0xbf810000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+};
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+	0xb0804004, 0xbf8a0000,
+	0xbe8203b8, 0xbefc0380,
+	0x7e008480, 0x7e028480,
+	0x7e048480, 0x7e068480,
+	0x7e088480, 0x7e0a8480,
+	0x7e0c8480, 0x7e0e8480,
+	0xbefc0302, 0x80828802,
+	0xbf84fff5, 0xbe8203ff,
+	0x80000000, 0x87020002,
+	0xbf840012, 0xbefe03c1,
+	0xbeff03c1, 0xd7650001,
+	0x0001007f, 0xd7660001,
+	0x0002027e, 0x16020288,
+	0xbe8203bf, 0xbefc03c1,
+	0xd9382000, 0x00020201,
+	0xd9386040, 0x00040401,
+	0xd70f6a01, 0x000202ff,
+	0x00000400, 0x80828102,
+	0xbf84fff7, 0xbefc03ff,
+	0x00000068, 0xbe803080,
+	0xbe813080, 0xbe823080,
+	0xbe833080, 0x80fc847c,
+	0xbf84fffa, 0xbeea0480,
+	0xbeec0480, 0xbeee0480,
+	0xbef00480, 0xbef20480,
+	0xbef40480, 0xbef60480,
+	0xbef80480, 0xbefa0480,
+	0xbf810000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
new file mode 100644
index 000000000000..54f7ed9e2801
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is  first 64 Dwords or 256 bytes of 256 Dwords cleaner shader.
+
+// GFX10.1 : Clear SGPRs, VGPRs and LDS
+//   Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+//   Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+//   Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+//      It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+//   Each wave clears SGPRs 0 - 107
+//   Each wave clears VGPRs 0 - 63
+//   The first wave of the workgroup clears its 64KB of LDS
+//   The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+//       before any wave in the workgroup could end.  Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+  asic(GFX10.1)
+  type(CS)
+  wave_size(32)
+// Note: original source code from SQ team
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+//   (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+  S_BARRIER
+  s_cmp_eq_u32 s0, 1                                // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0
+  s_cbranch_scc0  label_0023                        // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1)
+
+  s_mov_b32     s2, 0x00000038  // Loop 64/8=8 times  (loop unrolled for performance)
+  s_mov_b32     m0, 0
+  //
+  // CLEAR VGPRs
+  //
+label_0005:
+  v_movreld_b32     v0, 0
+  v_movreld_b32     v1, 0
+  v_movreld_b32     v2, 0
+  v_movreld_b32     v3, 0
+  v_movreld_b32     v4, 0
+  v_movreld_b32     v5, 0
+  v_movreld_b32     v6, 0
+  v_movreld_b32     v7, 0
+  s_mov_b32         m0, s2
+  s_sub_u32     s2, s2, 8
+  s_cbranch_scc0  label_0005
+  //
+  s_mov_b32     s2, 0x80000000                       // Bit31 is first_wave
+  s_and_b32     s2, s2, s1                           // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+  s_cbranch_scc0  label_0023                         // Clean LDS if its first wave of ThreadGroup/WorkGroup
+  // CLEAR LDS
+  //
+  s_mov_b32 exec_lo, 0xffffffff
+  s_mov_b32 exec_hi, 0xffffffff
+  v_mbcnt_lo_u32_b32  v1, exec_hi, 0          // Set V1 to thread-ID (0..63)
+  v_mbcnt_hi_u32_b32  v1, exec_lo, v1        // Set V1 to thread-ID (0..63)
+  v_mul_u32_u24  v1, 0x00000008, v1          // * 8, so each thread is a double-dword address (8byte)
+  s_mov_b32     s2, 0x00000003f                    // 64 loop iterations
+  s_mov_b32     m0, 0xffffffff
+  // Clear all of LDS space
+  // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+  ds_write2_b64  v1, v[2:3], v[2:3] offset1:32
+  ds_write2_b64  v1, v[4:5], v[4:5] offset0:64 offset1:96
+  v_add_co_u32     v1, vcc, 0x00000400, v1
+  s_sub_u32     s2, s2, 1
+  s_cbranch_scc0  label_001F
+
+  //
+  // CLEAR SGPRs
+  //
+label_0023:
+  s_mov_b32     m0, 0x00000068  // Loop 108/4=27 times  (loop unrolled for performance)
+label_sgpr_loop:
+  s_movreld_b32     s0, s0
+  s_movreld_b32     s1, s0
+  s_movreld_b32     s2, s0
+  s_movreld_b32     s3, s0
+  s_sub_u32         m0, m0, 4
+  s_cbranch_scc0  label_sgpr_loop
+
+  //clear vcc
+  s_mov_b64 vcc, 0          //clear vcc
+  //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0   //clear  flat scratch lo SGPR
+  //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0    //clear  flat scratch hi SGPR
+  s_mov_b64 ttmp0, 0        //Clear ttmp0 and ttmp1
+  s_mov_b64 ttmp2, 0        //Clear ttmp2 and ttmp3
+  s_mov_b64 ttmp4, 0        //Clear ttmp4 and ttmp5
+  s_mov_b64 ttmp6, 0        //Clear ttmp6 and ttmp7
+  s_mov_b64 ttmp8, 0        //Clear ttmp8 and ttmp9
+  s_mov_b64 ttmp10, 0       //Clear ttmp10 and ttmp11
+  s_mov_b64 ttmp12, 0       //Clear ttmp12 and ttmp13
+  s_mov_b64 ttmp14, 0       //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is  first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+//   Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+//   Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+//   Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+//      It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+//   Each wave clears SGPRs 0 - 107
+//   Each wave clears VGPRs 0 - 63
+//   The first wave of the workgroup clears its 64KB of LDS
+//   The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+//       before any wave in the workgroup could end.  Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+  asic(GFX10)
+  type(CS)
+  wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+//   (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+  S_BARRIER
+  s_mov_b32     s2, 0x00000038  // Loop 64/8=8 times  (loop unrolled for performance)
+  s_mov_b32     m0, 0
+  //
+  // CLEAR VGPRs
+  //
+label_0005:
+  v_movreld_b32     v0, 0
+  v_movreld_b32     v1, 0
+  v_movreld_b32     v2, 0
+  v_movreld_b32     v3, 0
+  v_movreld_b32     v4, 0
+  v_movreld_b32     v5, 0
+  v_movreld_b32     v6, 0
+  v_movreld_b32     v7, 0
+  s_mov_b32         m0, s2
+  s_sub_u32     s2, s2, 8
+  s_cbranch_scc0  label_0005
+  //
+  s_mov_b32     s2, 0x80000000                     // Bit31 is first_wave
+  s_and_b32     s2, s2, s0                                  // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+  s_cbranch_scc0  label_0023                         // Clean LDS if its first wave of ThreadGroup/WorkGroup
+  // CLEAR LDS
+  //
+  s_mov_b32 exec_lo, 0xffffffff
+  s_mov_b32 exec_hi, 0xffffffff
+  v_mbcnt_lo_u32_b32  v1, exec_hi, 0          // Set V1 to thread-ID (0..63)
+  v_mbcnt_hi_u32_b32  v1, exec_lo, v1        // Set V1 to thread-ID (0..63)
+  v_mul_u32_u24  v1, 0x00000008, v1          // * 8, so each thread is a double-dword address (8byte)
+  s_mov_b32     s2, 0x00000003f                    // 64 loop iterations
+  s_mov_b32     m0, 0xffffffff
+  // Clear all of LDS space
+  // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+  ds_write2_b64  v1, v[2:3], v[2:3] offset1:32
+  ds_write2_b64  v1, v[4:5], v[4:5] offset0:64 offset1:96
+  v_add_co_u32     v1, vcc, 0x00000400, v1
+  s_sub_u32     s2, s2, 1
+  s_cbranch_scc0  label_001F
+
+  //
+  // CLEAR SGPRs
+  //
+label_0023:
+  s_mov_b32     m0, 0x00000068  // Loop 108/4=27 times  (loop unrolled for performance)
+label_sgpr_loop:
+  s_movreld_b32     s0, 0
+  s_movreld_b32     s1, 0
+  s_movreld_b32     s2, 0
+  s_movreld_b32     s3, 0
+  s_sub_u32         m0, m0, 4
+  s_cbranch_scc0  label_sgpr_loop
+
+  //clear vcc
+  s_mov_b32 flat_scratch_lo, 0   //clear  flat scratch lo SGPR
+  s_mov_b32 flat_scratch_hi, 0   //clear  flat scratch hi SGPR
+  s_mov_b64 vcc, 0          //clear vcc
+  s_mov_b64 ttmp0, 0        //Clear ttmp0 and ttmp1
+  s_mov_b64 ttmp2, 0        //Clear ttmp2 and ttmp3
+  s_mov_b64 ttmp4, 0        //Clear ttmp4 and ttmp5
+  s_mov_b64 ttmp6, 0        //Clear ttmp6 and ttmp7
+  s_mov_b64 ttmp8, 0        //Clear ttmp8 and ttmp9
+  s_mov_b64 ttmp10, 0       //Clear ttmp10 and ttmp11
+  s_mov_b64 ttmp12, 0       //Clear ttmp12 and ttmp13
+  s_mov_b64 ttmp14, 0       //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
new file mode 100644
index 000000000000..8a2ee2de390f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -0,0 +1,7538 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v11_0.h"
+#include "soc21.h"
+#include "nvd.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "smuio/smuio_13_0_6_offset.h"
+#include "smuio/smuio_13_0_6_sh_mask.h"
+#include "navi10_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx11.h"
+#include "v11_structs.h"
+#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
+#include "gfx_v11_0_3.h"
+#include "nbio_v4_3.h"
+#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX11_NUM_GFX_RINGS		1
+#define GFX11_MEC_HPD_SIZE	2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
+
+#define regCGTT_WD_CLK_CTRL		0x5086
+#define regCGTT_WD_CLK_CTRL_BASE_IDX	1
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
+#define regPC_CONFIG_CNTL_1		0x194d
+#define regPC_CONFIG_CNTL_1_BASE_IDX	1
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT                                             0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT                                                0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT                                      0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT                                             0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT                                                0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT                                         0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT                                                0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
+#define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00300000
+
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
+	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
+	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	/* SE status registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
+	/* compute registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
+	/* gfx queue registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+                                 struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t val);
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+					   uint16_t pasid, uint32_t flush_type,
+					   bool all_hub, uint8_t dst_sel);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+				      bool enable);
+
+static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	u64 shader_mc_addr;
+
+	/* Cleaner shader MC address */
+	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
+			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring)
+{
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = ring->wptr_gpu_addr;
+	uint32_t me = 0, eng_sel = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_COMPUTE:
+		me = 1;
+		eng_sel = 0;
+		break;
+	case AMDGPU_RING_TYPE_GFX:
+		me = 0;
+		eng_sel = 4;
+		break;
+	case AMDGPU_RING_TYPE_MES:
+		me = 2;
+		eng_sel = 5;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			  PACKET3_MAP_QUEUES_ME((me)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   enum amdgpu_unmap_queues_action action,
+				   u64 gpu_addr, u64 seq)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+		return;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				   struct amdgpu_ring *ring,
+				   u64 addr,
+				   u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+				uint16_t pasid, uint32_t flush_type,
+				bool all_hub)
+{
+	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx11_kiq_set_resources,
+	.kiq_map_queues = gfx11_kiq_map_queues,
+	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
+	.kiq_query_status = gfx11_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+	.invalidate_tlbs_size = 2,
+};
+
+static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
+}
+
+static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_11_0_1,
+						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
+		break;
+	default:
+		break;
+	}
+	soc15_program_register_sequence(adev,
+					golden_settings_gc_11_0,
+					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
+
+}
+
+static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+				       bool wc, uint32_t reg, uint32_t val)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+				  int mem_space, int opt, uint32_t addr0,
+				  uint32_t addr1, uint32_t ref, uint32_t mask,
+				  uint32_t inv)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	amdgpu_ring_write(ring,
+			  /* memory (1) or register (0) */
+			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
+			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
+			   WAIT_REG_MEM_ENGINE(eng_sel)));
+
+	if (mem_space)
+		BUG_ON(addr0 & 0x3); /* Dword align */
+	amdgpu_ring_write(ring, addr0);
+	amdgpu_ring_write(ring, addr1);
+	amdgpu_ring_write(ring, ref);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+	/* Header itself is a NOP packet */
+	if (num_nop == 1) {
+		amdgpu_ring_write(ring, ring->funcs->nop);
+		return;
+	}
+
+	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
+	amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(scratch, 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 5);
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+	} else {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+		amdgpu_ring_write(ring, scratch -
+				  PACKET3_SET_UCONFIG_REG_START);
+		amdgpu_ring_write(ring, 0xDEADBEEF);
+	}
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		if (amdgpu_emu_mode == 1)
+			msleep(1);
+		else
+			udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+	return r;
+}
+
+static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+	unsigned index;
+	uint64_t gpu_addr;
+	uint32_t *cpu_ptr;
+	long r;
+
+	/* MES KIQ fw hasn't indirect buffer support for now */
+	if (adev->enable_mes_kiq &&
+	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		return 0;
+
+	memset(&ib, 0, sizeof(ib));
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	cpu_ptr = &adev->wb.wb[index];
+
+	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
+	}
+
+	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ib.ptr[2] = lower_32_bits(gpu_addr);
+	ib.ptr[3] = upper_32_bits(gpu_addr);
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r)
+		goto err2;
+
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto err2;
+	} else if (r < 0) {
+		goto err2;
+	}
+
+	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+		r = 0;
+	else
+		r = -EINVAL;
+err2:
+	amdgpu_ib_free(&ib, NULL);
+	dma_fence_put(f);
+err1:
+	amdgpu_device_wb_free(adev, index);
+	return r;
+}
+
+static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
+{
+	amdgpu_ucode_release(&adev->gfx.pfp_fw);
+	amdgpu_ucode_release(&adev->gfx.me_fw);
+	amdgpu_ucode_release(&adev->gfx.rlc_fw);
+	amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+	const struct psp_firmware_header_v1_0 *toc_hdr;
+	int err = 0;
+
+	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_toc.bin", ucode_prefix);
+	if (err)
+		goto out;
+
+	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->psp.toc_fw);
+	return err;
+}
+
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		if ((adev->gfx.me_fw_version >= 1505) &&
+		    (adev->gfx.pfp_fw_version >= 1600) &&
+		    (adev->gfx.mec_fw_version >= 512)) {
+			if (amdgpu_sriov_vf(adev))
+				adev->gfx.cp_gfx_shadow = true;
+			else
+				adev->gfx.cp_gfx_shadow = false;
+		}
+		break;
+	default:
+		adev->gfx.cp_gfx_shadow = false;
+		break;
+	}
+}
+
+static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
+{
+	char ucode_prefix[25];
+	int err;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	uint16_t version_major;
+	uint16_t version_minor;
+
+	DRM_DEBUG("\n");
+
+	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_pfp.bin", ucode_prefix);
+	if (err)
+		goto out;
+	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
+	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
+				(union amdgpu_firmware_header *)
+				adev->gfx.pfp_fw->data, 2, 0);
+	if (adev->gfx.rs64_enable) {
+		dev_info(adev->dev, "CP RS64 enable\n");
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
+	} else {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_me.bin", ucode_prefix);
+	if (err)
+		goto out;
+	if (adev->gfx.rs64_enable) {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
+	} else {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
+	}
+
+	if (!amdgpu_sriov_vf(adev)) {
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
+		    adev->pdev->revision == 0xCE)
+			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/gc_11_0_0_rlc_1.bin");
+		else if (amdgpu_is_kicker_fw(adev))
+			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+		else
+			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_rlc.bin", ucode_prefix);
+		if (err)
+			goto out;
+		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+		if (err)
+			goto out;
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_mec.bin", ucode_prefix);
+	if (err)
+		goto out;
+	if (adev->gfx.rs64_enable) {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
+	} else {
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
+		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
+
+	/* only one MEC for gfx 11.0.0. */
+	adev->gfx.mec2_fw = NULL;
+
+	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
+
+	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
+		err = adev->gfx.imu.funcs->init_microcode(adev);
+		if (err)
+			DRM_ERROR("Failed to init imu firmware!\n");
+		return err;
+	}
+
+out:
+	if (err) {
+		amdgpu_ucode_release(&adev->gfx.pfp_fw);
+		amdgpu_ucode_release(&adev->gfx.me_fw);
+		amdgpu_ucode_release(&adev->gfx.rlc_fw);
+		amdgpu_ucode_release(&adev->gfx.mec_fw);
+	}
+
+	return err;
+}
+
+static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	/* begin clear state */
+	count += 2;
+	/* context control state */
+	count += 3;
+
+	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT)
+				count += 2 + ext->reg_count;
+			else
+				return 0;
+		}
+	}
+
+	/* set PA_SC_TILE_STEERING_OVERRIDE */
+	count += 3;
+	/* end clear state */
+	count += 2;
+	/* clear state */
+	count += 2;
+
+	return count;
+}
+
+static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+	u32 count = 0;
+	int ctx_reg_offset;
+
+	if (adev->gfx.rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	count = amdgpu_gfx_csb_preamble_start(buffer);
+	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+
+	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	buffer[count++] = cpu_to_le32(ctx_reg_offset);
+	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+	amdgpu_gfx_csb_preamble_end(buffer, count);
+}
+
+static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
+{
+	/* clear state block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+			&adev->gfx.rlc.clear_state_gpu_addr,
+			(void **)&adev->gfx.rlc.cs_ptr);
+
+	/* jump table block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+			&adev->gfx.rlc.cp_table_gpu_addr,
+			(void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+	adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
+{
+	const struct cs_section_def *cs_data;
+	int r;
+
+	adev->gfx.rlc.cs_data = gfx11_cs_data;
+
+	cs_data = adev->gfx.rlc.cs_data;
+
+	if (cs_data) {
+		/* init clear state block */
+		r = amdgpu_gfx_rlc_init_csb(adev);
+		if (r)
+			return r;
+	}
+
+	/* init spm vmid with 0xf */
+	if (adev->gfx.rlc.funcs->update_spm_vmid)
+		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+	return 0;
+}
+
+static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v11_0_me_init(struct amdgpu_device *adev)
+{
+	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+	amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
+{
+	int r;
+	u32 *hpd;
+	size_t mec_hpd_size;
+
+	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
+
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+					      AMDGPU_GEM_DOMAIN_GTT,
+					      &adev->gfx.mec.hpd_eop_obj,
+					      &adev->gfx.mec.hpd_eop_gpu_addr,
+					      (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v11_0_mec_fini(adev);
+			return r;
+		}
+
+		memset(hpd, 0, mec_hpd_size);
+
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
+
+	return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(address << SQ_IND_INDEX__INDEX__SHIFT));
+	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+			   uint32_t thread, uint32_t regno,
+			   uint32_t num, uint32_t *out)
+{
+	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
+		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+		(SQ_IND_INDEX__AUTO_INCR_MASK));
+	while (num--)
+		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
+	 * field when performing a select_se_sh so it should be
+	 * zero here */
+	WARN_ON(simd != 0);
+
+	/* type 3 wave data */
+	dst[(*no_fields)++] = 3;
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+}
+
+static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				     uint32_t wave, uint32_t start,
+				     uint32_t size, uint32_t *dst)
+{
+	WARN_ON(simd != 0);
+
+	wave_read_regs(
+		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+		dst);
+}
+
+static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
+				      uint32_t wave, uint32_t thread,
+				      uint32_t start, uint32_t size,
+				      uint32_t *dst)
+{
+	wave_read_regs(
+		adev, wave, thread,
+		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
+					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+	soc21_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE      73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE       484
+#define MQD_FWWORKAREA_ALIGNMENT  256
+
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+					 struct amdgpu_gfx_shadow_info *shadow_info)
+{
+	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+					 struct amdgpu_gfx_shadow_info *shadow_info,
+					 bool skip_check)
+{
+	if (adev->gfx.cp_gfx_shadow || skip_check) {
+		gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+		return 0;
+	} else {
+		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+		return -ENOTSUPP;
+	}
+}
+
+static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v11_0_select_se_sh,
+	.read_wave_data = &gfx_v11_0_read_wave_data,
+	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
+	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
+	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
+	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
+};
+
+static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
+{
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		break;
+	case IP_VERSION(11, 0, 3):
+		adev->gfx.ras = &gfx_v11_0_3_ras;
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+				   int me, int pipe, int queue)
+{
+	struct amdgpu_ring *ring;
+	unsigned int irq_type;
+	unsigned int hw_prio;
+
+	ring = &adev->gfx.gfx_ring[ring_id];
+
+	ring->me = me;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	if (adev->gfx.disable_kq) {
+		ring->no_scheduler = true;
+		ring->no_user_submission = true;
+	}
+
+	if (!ring_id)
+		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+	else
+		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+				hw_prio, NULL);
+}
+
+static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+				       int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring;
+	unsigned int hw_prio;
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX11_MEC_HPD_SIZE);
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+			     hw_prio, NULL);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static struct {
+	SOC21_FIRMWARE_ID	id;
+	unsigned int		offset;
+	unsigned int		size;
+} rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
+
+static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
+
+	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
+			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
+		rlc_autoload_info[ucode->id].id = ucode->id;
+		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
+		rlc_autoload_info[ucode->id].size = ucode->size * 4;
+
+		ucode++;
+	}
+}
+
+static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+	uint32_t total_size = 0;
+	SOC21_FIRMWARE_ID id;
+
+	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
+		total_size += rlc_autoload_info[id].size;
+
+	/* In case the offset in rlc toc ucode is aligned */
+	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
+		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
+			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
+
+	return total_size;
+}
+
+static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+	int r;
+	uint32_t total_size;
+
+	total_size = gfx_v11_0_calc_toc_total_size(adev);
+
+	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.rlc.rlc_autoload_bo,
+				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
+				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+					      SOC21_FIRMWARE_ID id,
+			    		      const void *fw_data,
+					      uint32_t fw_size,
+					      uint32_t *fw_autoload_mask)
+{
+	uint32_t toc_offset;
+	uint32_t toc_fw_size;
+	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
+		return;
+
+	toc_offset = rlc_autoload_info[id].offset;
+	toc_fw_size = rlc_autoload_info[id].size;
+
+	if (fw_size == 0)
+		fw_size = toc_fw_size;
+
+	if (fw_size > toc_fw_size)
+		fw_size = toc_fw_size;
+
+	memcpy(ptr + toc_offset, fw_data, fw_size);
+
+	if (fw_size < toc_fw_size)
+		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+
+	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
+		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
+							uint32_t *fw_autoload_mask)
+{
+	void *data;
+	uint32_t size;
+	uint64_t *toc_ptr;
+
+	*(uint64_t *)fw_autoload_mask |= 0x1;
+
+	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
+
+	data = adev->psp.toc.start_addr;
+	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
+
+	toc_ptr = (uint64_t *)data + size / 8 - 1;
+	*toc_ptr = *(uint64_t *)fw_autoload_mask;
+
+	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
+					data, size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
+							uint32_t *fw_autoload_mask)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+	uint16_t version_major, version_minor;
+
+	if (adev->gfx.rs64_enable) {
+		/* pfp ucode */
+		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.pfp_fw->data;
+		/* instruction */
+		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
+						fw_data, fw_size, fw_autoload_mask);
+		/* data */
+		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+			le32_to_cpu(cpv2_hdr->data_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		/* me ucode */
+		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.me_fw->data;
+		/* instruction */
+		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
+						fw_data, fw_size, fw_autoload_mask);
+		/* data */
+		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+			le32_to_cpu(cpv2_hdr->data_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		/* mec ucode */
+		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+			adev->gfx.mec_fw->data;
+		/* instruction */
+		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
+						fw_data, fw_size, fw_autoload_mask);
+		/* data */
+		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+			le32_to_cpu(cpv2_hdr->data_offset_bytes));
+		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
+						fw_data, fw_size, fw_autoload_mask);
+	} else {
+		/* pfp ucode */
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.pfp_fw->data;
+		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
+						fw_data, fw_size, fw_autoload_mask);
+
+		/* me ucode */
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.me_fw->data;
+		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
+						fw_data, fw_size, fw_autoload_mask);
+
+		/* mec ucode */
+		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+			adev->gfx.mec_fw->data;
+		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+			cp_hdr->jt_size * 4;
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
+						fw_data, fw_size, fw_autoload_mask);
+	}
+
+	/* rlc ucode */
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+		adev->gfx.rlc_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
+					fw_data, fw_size, fw_autoload_mask);
+
+	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+	if (version_major == 2) {
+		if (version_minor >= 2) {
+			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
+					fw_data, fw_size, fw_autoload_mask);
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
+					fw_data, fw_size, fw_autoload_mask);
+		}
+	}
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
+							uint32_t *fw_autoload_mask)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct sdma_firmware_header_v2_0 *sdma_hdr;
+
+	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
+		adev->sdma.instance[0].fw->data;
+	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
+
+	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
+
+	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
+	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
+
+	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
+}
+
+static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
+							uint32_t *fw_autoload_mask)
+{
+	const __le32 *fw_data;
+	unsigned fw_size;
+	const struct mes_firmware_header_v1_0 *mes_hdr;
+	int pipe, ucode_id, data_id;
+
+	for (pipe = 0; pipe < 2; pipe++) {
+		if (pipe==0) {
+			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
+			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
+		} else {
+			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
+			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
+		}
+
+		mes_hdr = (const struct mes_firmware_header_v1_0 *)
+			adev->mes.fw[pipe]->data;
+
+		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+				ucode_id, fw_data, fw_size, fw_autoload_mask);
+
+		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
+				data_id, fw_data, fw_size, fw_autoload_mask);
+	}
+}
+
+static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+	uint32_t rlc_g_offset, rlc_g_size;
+	uint64_t gpu_addr;
+	uint32_t autoload_fw_id[2];
+
+	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
+
+	/* RLC autoload sequence 2: copy ucode */
+	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
+	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
+	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
+	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
+
+	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
+	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
+	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+	/* RLC autoload sequence 3: load IMU fw */
+	if (adev->gfx.imu.funcs->load_microcode)
+		adev->gfx.imu.funcs->load_microcode(adev);
+	/* RLC autoload sequence 4 init IMU fw */
+	if (adev->gfx.imu.funcs->setup_imu)
+		adev->gfx.imu.funcs->setup_imu(adev);
+	if (adev->gfx.imu.funcs->start_imu)
+		adev->gfx.imu.funcs->start_imu(adev);
+
+	/* RLC autoload sequence 5 disable gpa mode */
+	gfx_v11_0_disable_gpa_mode(adev);
+
+	return 0;
+}
+
+static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+	uint32_t *ptr;
+	uint32_t inst;
+
+	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+		adev->gfx.ip_dump_core = NULL;
+	} else {
+		adev->gfx.ip_dump_core = ptr;
+	}
+
+	/* Allocate memory for compute queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+		adev->gfx.mec.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+		adev->gfx.ip_dump_compute_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_compute_queues = ptr;
+	}
+
+	/* Allocate memory for gfx queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+		adev->gfx.me.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+		adev->gfx.ip_dump_gfx_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_gfx_queues = ptr;
+	}
+}
+
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i, j, k, r, ring_id;
+	int xcc_id = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 2;
+		adev->gfx.mec.num_mec = 1;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 4;
+		break;
+	default:
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.mec.num_mec = 1;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 8;
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		if (!adev->gfx.disable_uq &&
+		    adev->gfx.me_fw_version  >= 2420 &&
+		    adev->gfx.pfp_fw_version >= 2580 &&
+		    adev->gfx.mec_fw_version >= 2650 &&
+		    adev->mes.fw_version[0] >= 120) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		/* add firmware version checks here */
+		if (0 && !adev->gfx.disable_uq) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version  >= 2280 &&
+		    adev->gfx.pfp_fw_version >= 2370 &&
+		    adev->gfx.mec_fw_version >= 2450  &&
+		    adev->mes.fw_version[0] >= 99) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.pfp_fw_version >= 102 &&
+		    adev->gfx.mec_fw_version >= 66 &&
+		    adev->mes.fw_version[0] >= 128) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.mec_fw_version >= 26 &&
+		    adev->mes.fw_version[0] >= 114) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(11, 5, 2):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version  >= 12 &&
+		    adev->gfx.pfp_fw_version >= 15 &&
+		    adev->gfx.mec_fw_version >= 15) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	case IP_VERSION(11, 5, 3):
+		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+		if (adev->gfx.me_fw_version  >= 7 &&
+		    adev->gfx.pfp_fw_version >= 8 &&
+		    adev->gfx.mec_fw_version >= 8) {
+			adev->gfx.enable_cleaner_shader = true;
+			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+			if (r) {
+				adev->gfx.enable_cleaner_shader = false;
+				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+			}
+		}
+		break;
+	default:
+		adev->gfx.enable_cleaner_shader = false;
+		break;
+	}
+
+	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
+	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
+	    amdgpu_sriov_is_pp_one_vf(adev))
+		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
+
+	/* EOP Event */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+			      &adev->gfx.eop_irq);
+	if (r)
+		return r;
+
+	/* Bad opcode Event */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+			      &adev->gfx.bad_op_irq);
+	if (r)
+		return r;
+
+	/* Privileged reg */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+			      &adev->gfx.priv_reg_irq);
+	if (r)
+		return r;
+
+	/* Privileged inst */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+			      &adev->gfx.priv_inst_irq);
+	if (r)
+		return r;
+
+	/* FED error */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
+				  &adev->gfx.rlc_gc_fed_irq);
+	if (r)
+		return r;
+
+	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+	gfx_v11_0_me_init(adev);
+
+	r = gfx_v11_0_rlc_init(adev);
+	if (r) {
+		DRM_ERROR("Failed to init rlc BOs!\n");
+		return r;
+	}
+
+	r = gfx_v11_0_mec_init(adev);
+	if (r) {
+		DRM_ERROR("Failed to init MEC BOs!\n");
+		return r;
+	}
+
+	if (adev->gfx.num_gfx_rings) {
+		ring_id = 0;
+		/* set up the gfx ring */
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+						continue;
+
+					r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+								    i, k, j);
+					if (r)
+						return r;
+					ring_id++;
+				}
+			}
+		}
+	}
+
+	if (adev->gfx.num_compute_rings) {
+		ring_id = 0;
+		/* set up the compute queues - allocate horizontally across pipes */
+		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+					if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+									     k, j))
+						continue;
+
+					r = gfx_v11_0_compute_ring_init(adev, ring_id,
+									i, k, j);
+					if (r)
+						return r;
+
+					ring_id++;
+				}
+			}
+		}
+	}
+
+	adev->gfx.gfx_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+	adev->gfx.compute_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		if ((adev->gfx.me_fw_version >= 2280) &&
+		    (adev->gfx.mec_fw_version >= 2410) &&
+		    !amdgpu_sriov_vf(adev) &&
+		    !adev->debug_disable_gpu_ring_reset) {
+			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+		}
+		break;
+	default:
+		if (!amdgpu_sriov_vf(adev) &&
+		    !adev->debug_disable_gpu_ring_reset) {
+			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+		}
+		break;
+	}
+
+	if (!adev->enable_mes_kiq) {
+		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
+		if (r) {
+			DRM_ERROR("Failed to init KIQ BOs!\n");
+			return r;
+		}
+
+		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
+	if (r)
+		return r;
+
+	/* allocate visible FB for rlc auto-loading fw */
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v11_0_gpu_early_init(adev);
+	if (r)
+		return r;
+
+	if (amdgpu_gfx_ras_sw_init(adev)) {
+		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
+		return -EINVAL;
+	}
+
+	gfx_v11_0_alloc_ip_dump(adev);
+
+	r = amdgpu_gfx_sysfs_init(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+			      &adev->gfx.pfp.pfp_fw_gpu_addr,
+			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+			      &adev->gfx.me.me_fw_gpu_addr,
+			      (void **)&adev->gfx.me.me_fw_ptr);
+
+	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+			       &adev->gfx.me.me_fw_data_gpu_addr,
+			       (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+			&adev->gfx.rlc.rlc_autoload_gpu_addr,
+			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+	amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+	if (!adev->enable_mes_kiq) {
+		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+		amdgpu_gfx_kiq_fini(adev, 0);
+	}
+
+	amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
+	gfx_v11_0_pfp_fini(adev);
+	gfx_v11_0_me_fini(adev);
+	gfx_v11_0_rlc_fini(adev);
+	gfx_v11_0_mec_fini(adev);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+		gfx_v11_0_rlc_autoload_buffer_fini(adev);
+
+	gfx_v11_0_free_microcode(adev);
+
+	amdgpu_gfx_sysfs_fini(adev);
+
+	kfree(adev->gfx.ip_dump_core);
+	kfree(adev->gfx.ip_dump_compute_queues);
+	kfree(adev->gfx.ip_dump_gfx_queues);
+
+	return 0;
+}
+
+static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id)
+{
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+				     INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+				     instance);
+
+	if (se_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+	if (sh_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
+	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+					   CC_GC_SA_UNIT_DISABLE,
+					   SA_DISABLE);
+	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
+	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+						 GC_USER_SA_UNIT_DISABLE,
+						 SA_DISABLE);
+	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+					    adev->gfx.config.max_shader_engines);
+
+	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+	u32 rb_mask;
+
+	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+					    CC_RB_BACKEND_DISABLE,
+					    BACKEND_DISABLE);
+	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+						 GC_USER_RB_BACKEND_DISABLE,
+						 BACKEND_DISABLE);
+	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+					    adev->gfx.config.max_shader_engines);
+
+	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
+{
+	u32 rb_bitmap_per_sa;
+	u32 rb_bitmap_width_per_sa;
+	u32 max_sa;
+	u32 active_sa_bitmap;
+	u32 global_active_rb_bitmap;
+	u32 active_rb_bitmap = 0;
+	u32 i;
+
+	/* query sa bitmap from SA_UNIT_DISABLE registers */
+	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
+	/* query rb bitmap from RB_BACKEND_DISABLE registers */
+	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
+
+	/* generate active rb bitmap according to active sa bitmap */
+	max_sa = adev->gfx.config.max_shader_engines *
+		 adev->gfx.config.max_sh_per_se;
+	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+				 adev->gfx.config.max_sh_per_se;
+	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+	for (i = 0; i < max_sa; i++) {
+		if (active_sa_bitmap & (1 << i))
+			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+	}
+
+	active_rb_bitmap &= global_active_rb_bitmap;
+	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define DEFAULT_SH_MEM_BASES	(0x6000)
+#define LDS_APP_BASE           0x1
+#define SCRATCH_APP_BASE       0x2
+
+static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+	int i;
+	uint32_t sh_mem_bases;
+	uint32_t data;
+
+	/*
+	 * Configure apertures:
+	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+	 */
+	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+			SCRATCH_APP_BASE;
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+		soc21_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+		/* Enable trap for each kfd vmid. */
+		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/*
+	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
+	 * access. These should be enabled by FW for target VMIDs.
+	 */
+	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
+	}
+}
+
+static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+	int vmid;
+
+	/*
+	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
+	 * the driver can enable them for graphics. VMID0 should maintain
+	 * access so that HWS firmware can save/restore entries.
+	 */
+	for (vmid = 1; vmid < 16; vmid++) {
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
+		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
+	}
+}
+
+static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
+{
+	/* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
+{
+	/* TCCs are global (not instanced). */
+	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
+			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
+
+	adev->gfx.config.tcc_disabled_mask =
+		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
+		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
+}
+
+static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
+{
+	u32 tmp;
+	int i;
+
+	if (!amdgpu_sriov_vf(adev))
+		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+	gfx_v11_0_setup_rb(adev);
+	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
+	gfx_v11_0_get_tcc_info(adev);
+	adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+	/* Set whether texture coordinate truncation is conformant. */
+	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
+	adev->gfx.config.ta_cntl2_truncate_coord_mode =
+		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
+
+	/* XXX SH_MEM regs */
+	/* where to put LDS, scratch, GPUVM in FSA64 space */
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+		soc21_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		if (i != 0) {
+			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+				(adev->gmc.private_aperture_start >> 48));
+			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+				(adev->gmc.shared_aperture_start >> 48));
+			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+		}
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	mutex_unlock(&adev->srbm_mutex);
+
+	gfx_v11_0_init_compute_vmid(adev);
+	gfx_v11_0_init_gds_vmid(adev);
+}
+
+static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	if (me != 0)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+	case 1:
+		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+	default:
+		return 0;
+	}
+}
+
+static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+	if (me != 1)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+	case 1:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+	case 2:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+	case 3:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+	default:
+		return 0;
+	}
+}
+
+static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+					       bool enable)
+{
+	u32 tmp, cp_int_cntl_reg;
+	int i, j;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+			if (cp_int_cntl_reg) {
+				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+						    enable ? 1 : 0);
+				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+			}
+		}
+	}
+}
+
+static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
+{
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+			adev->gfx.rlc.clear_state_gpu_addr >> 32);
+	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+	return 0;
+}
+
+static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+	udelay(50);
+	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+	udelay(50);
+}
+
+static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t rlc_pg_cntl;
+
+	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+	if (!enable) {
+		/* RLC_PG_CNTL[23] = 0 (default)
+		 * RLC will wait for handshake acks with SMU
+		 * GFXOFF will be enabled
+		 * RLC_PG_CNTL[23] = 1
+		 * RLC will not issue any message to SMU
+		 * hence no handshake between SMU & RLC
+		 * GFXOFF will be disabled
+		 */
+		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+	} else
+		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
+{
+	/* TODO: enable rlc & smu handshake until smu
+	 * and gfxoff feature works as expected */
+	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
+
+	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+	udelay(50);
+}
+
+static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
+
+	/* enable Save Restore Machine */
+	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_0 *hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+
+	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+		     RLCG_UCODE_LOADING_START_ADDRESS);
+
+	for (i = 0; i < fw_size; i++)
+		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+			     le32_to_cpup(fw_data++));
+
+	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_2 *hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+	u32 tmp;
+
+	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_3 *hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+	u32 tmp;
+
+	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
+
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
+
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
+	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
+	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
+}
+
+static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_0 *hdr;
+	uint16_t version_major;
+	uint16_t version_minor;
+
+	if (!adev->gfx.rlc_fw)
+		return -EINVAL;
+
+	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+	version_major = le16_to_cpu(hdr->header.header_version_major);
+	version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+	if (version_major == 2) {
+		gfx_v11_0_load_rlcg_microcode(adev);
+		if (amdgpu_dpm == 1) {
+			if (version_minor >= 2)
+				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
+			if (version_minor == 3)
+				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
+		}
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		gfx_v11_0_init_csb(adev);
+
+		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+			gfx_v11_0_rlc_enable_srm(adev);
+	} else {
+		if (amdgpu_sriov_vf(adev)) {
+			gfx_v11_0_init_csb(adev);
+			return 0;
+		}
+
+		adev->gfx.rlc.funcs->stop(adev);
+
+		/* disable CG */
+		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+		/* disable PG */
+		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+			/* legacy rlc firmware loading */
+			r = gfx_v11_0_rlc_load_microcode(adev);
+			if (r)
+				return r;
+		}
+
+		gfx_v11_0_init_csb(adev);
+
+		adev->gfx.rlc.funcs->start(adev);
+	}
+	return 0;
+}
+
+static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+					INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+	/* Program me ucode address into intruction cache address register */
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+					INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+	/* Program pfp ucode address into intruction cache address register */
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	int i;
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+					INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+	/* Program mec1 ucode address into intruction cache address register */
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+			lower_32_bits(addr) & 0xFFFFF000);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+			upper_32_bits(addr));
+
+	return 0;
+}
+
+static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	unsigned i, pipe_id;
+	const struct gfx_firmware_header_v2_0 *pfp_hdr;
+
+	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+		lower_32_bits(addr));
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+		upper_32_bits(addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_PFP_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+	/* Waiting for cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+			(pfp_hdr->ucode_start_addr_hi << 30) |
+			(pfp_hdr->ucode_start_addr_lo >> 2));
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+			pfp_hdr->ucode_start_addr_hi >> 2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_PFP_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+			lower_32_bits(addr2));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+			upper_32_bits(addr2));
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	unsigned i, pipe_id;
+	const struct gfx_firmware_header_v2_0 *me_hdr;
+
+	me_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+		lower_32_bits(addr));
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+		upper_32_bits(addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_ME_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+	/* Waiting for instruction cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+			(me_hdr->ucode_start_addr_hi << 30) |
+			(me_hdr->ucode_start_addr_lo >> 2) );
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+			me_hdr->ucode_start_addr_hi>>2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_PFP_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+			lower_32_bits(addr2));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+			upper_32_bits(addr2));
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
+{
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+	uint32_t tmp;
+	unsigned i;
+	const struct gfx_firmware_header_v2_0 *mec_hdr;
+
+	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.mec_fw->data;
+
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+		soc21_grbm_select(adev, 1, i, 0, 0);
+
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+		     upper_32_bits(addr2));
+
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+					mec_hdr->ucode_start_addr_lo >> 2 |
+					mec_hdr->ucode_start_addr_hi << 30);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+					mec_hdr->ucode_start_addr_hi >> 2);
+
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+		     upper_32_bits(addr));
+	}
+	mutex_unlock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+				       INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+				       INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *pfp_hdr;
+	const struct gfx_firmware_header_v2_0 *me_hdr;
+	const struct gfx_firmware_header_v2_0 *mec_hdr;
+	uint32_t pipe_id, tmp;
+
+	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.mec_fw->data;
+	me_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+
+	/* config pfp program start addr */
+	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+			(pfp_hdr->ucode_start_addr_hi << 30) |
+			(pfp_hdr->ucode_start_addr_lo >> 2));
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+			pfp_hdr->ucode_start_addr_hi >> 2);
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset pfp pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* clear pfp pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* config me program start addr */
+	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+			(me_hdr->ucode_start_addr_hi << 30) |
+			(me_hdr->ucode_start_addr_lo >> 2) );
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+			me_hdr->ucode_start_addr_hi>>2);
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset me pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* clear me pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* config mec program start addr */
+	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+					mec_hdr->ucode_start_addr_lo >> 2 |
+					mec_hdr->ucode_start_addr_hi << 30);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+					mec_hdr->ucode_start_addr_hi >> 2);
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset mec pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+	/* clear mec pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+	uint32_t cp_status;
+	uint32_t bootload_status;
+	int i, r;
+	uint64_t addr, addr2;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+
+		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			    IP_VERSION(11, 0, 1) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
+			    IP_VERSION(11, 0, 4) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
+			bootload_status = RREG32_SOC15(GC, 0,
+					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
+		else
+			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+		if ((cp_status == 0) &&
+		    (REG_GET_FIELD(bootload_status,
+			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+			break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		if (adev->gfx.rs64_enable) {
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
+			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
+			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
+			if (r)
+				return r;
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
+			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
+			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
+			if (r)
+				return r;
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
+			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
+			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
+			if (r)
+				return r;
+		} else {
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
+			r = gfx_v11_0_config_me_cache(adev, addr);
+			if (r)
+				return r;
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
+			r = gfx_v11_0_config_pfp_cache(adev, addr);
+			if (r)
+				return r;
+			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
+			r = gfx_v11_0_config_mec_cache(adev, addr);
+			if (r)
+				return r;
+		}
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+	int i;
+	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v1_0 *pfp_hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+
+	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.pfp_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.pfp.pfp_fw_obj,
+				      &adev->gfx.pfp.pfp_fw_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+		gfx_v11_0_pfp_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
+
+	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
+
+	for (i = 0; i < pfp_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
+			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v2_0 *pfp_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+	/* instruction */
+	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+	/* 64kb align */
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.pfp.pfp_fw_obj,
+				      &adev->gfx.pfp.pfp_fw_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+		gfx_v11_0_pfp_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev, fw_data_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.pfp.pfp_fw_data_obj,
+				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+		gfx_v11_0_pfp_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_PFP_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+	/* Waiting for cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+			(pfp_hdr->ucode_start_addr_hi << 30) |
+			(pfp_hdr->ucode_start_addr_lo >> 2) );
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+			pfp_hdr->ucode_start_addr_hi>>2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_PFP_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v1_0 *me_hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+
+	me_hdr = (const struct gfx_firmware_header_v1_0 *)
+		adev->gfx.me_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.me.me_fw_obj,
+				      &adev->gfx.me.me_fw_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+		gfx_v11_0_me_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
+
+	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
+
+	for (i = 0; i < me_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
+			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v2_0 *me_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	me_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+	/* instruction */
+	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+	/* 64kb align*/
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.me.me_fw_obj,
+				      &adev->gfx.me.me_fw_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+		gfx_v11_0_me_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev, fw_data_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.me.me_fw_data_obj,
+				      &adev->gfx.me.me_fw_data_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+		gfx_v11_0_pfp_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_ME_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+	/* Waiting for instruction cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+			(me_hdr->ucode_start_addr_hi << 30) |
+			(me_hdr->ucode_start_addr_lo >> 2) );
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+			me_hdr->ucode_start_addr_hi>>2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_PFP_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+		return -EINVAL;
+
+	gfx_v11_0_cp_gfx_enable(adev, false);
+
+	if (adev->gfx.rs64_enable)
+		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
+	else
+		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+		return r;
+	}
+
+	if (adev->gfx.rs64_enable)
+		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
+	else
+		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+	int r, i;
+	int ctx_reg_offset;
+
+	/* init the CP */
+	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+		     adev->gfx.config.max_hw_contexts - 1);
+	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+	if (!amdgpu_async_gfx_ring)
+		gfx_v11_0_cp_gfx_enable(adev, true);
+
+	ring = &adev->gfx.gfx_ring[0];
+	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+		return r;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, 0x80000000);
+	amdgpu_ring_write(ring, 0x80000000);
+
+	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				amdgpu_ring_write(ring,
+						  PACKET3(PACKET3_SET_CONTEXT_REG,
+							  ext->reg_count));
+				amdgpu_ring_write(ring, ext->reg_index -
+						  PACKET3_SET_CONTEXT_REG_START);
+				for (i = 0; i < ext->reg_count; i++)
+					amdgpu_ring_write(ring, ext->extent[i]);
+			}
+		}
+	}
+
+	ctx_reg_offset =
+		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+	amdgpu_ring_write(ring, ctx_reg_offset);
+	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+	amdgpu_ring_write(ring, 0);
+
+	amdgpu_ring_commit(ring);
+
+	/* submit cs packet to copy state 0 to next available state */
+	if (adev->gfx.num_gfx_rings > 1) {
+		/* maximum supported gfx ring is 2 */
+		ring = &adev->gfx.gfx_ring[1];
+		r = amdgpu_ring_alloc(ring, 2);
+		if (r) {
+			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+			return r;
+		}
+
+		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+		amdgpu_ring_write(ring, 0);
+
+		amdgpu_ring_commit(ring);
+	}
+	return 0;
+}
+
+static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+					 CP_PIPE_ID pipe)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+					  struct amdgpu_ring *ring)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+	if (ring->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, ring->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	}
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	u32 tmp;
+	u32 rb_bufsz;
+	u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+	/* Set the write pointer delay */
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+	/* set the RB to use vmid 0 */
+	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+	/* Init gfx ring 0 for pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+	/* Set ring buffer size */
+	ring = &adev->gfx.gfx_ring[0];
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+	/* Initialize the ring buffer's write pointers */
+	ring->wptr = 0;
+	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+	/* set the wb address whether it's enabled or not */
+	rptr_addr = ring->rptr_gpu_addr;
+	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+	wptr_gpu_addr = ring->wptr_gpu_addr;
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+		     lower_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+		     upper_32_bits(wptr_gpu_addr));
+
+	mdelay(1);
+	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+	rb_addr = ring->gpu_addr >> 8;
+	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* Init gfx ring 1 for pipe 1 */
+	if (adev->gfx.num_gfx_rings > 1) {
+		mutex_lock(&adev->srbm_mutex);
+		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+		/* maximum supported gfx ring is 2 */
+		ring = &adev->gfx.gfx_ring[1];
+		rb_bufsz = order_base_2(ring->ring_size / 8);
+		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+		/* Initialize the ring buffer's write pointers */
+		ring->wptr = 0;
+		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+		/* Set the wb address whether it's enabled or not */
+		rptr_addr = ring->rptr_gpu_addr;
+		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+		wptr_gpu_addr = ring->wptr_gpu_addr;
+		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+			     lower_32_bits(wptr_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+			     upper_32_bits(wptr_gpu_addr));
+
+		mdelay(1);
+		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
+
+		rb_addr = ring->gpu_addr >> 8;
+		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
+		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
+
+		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+	/* Switch to pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* start the ring */
+	gfx_v11_0_cp_gfx_start(adev);
+
+	return 0;
+}
+
+static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+	u32 data;
+
+	if (adev->gfx.rs64_enable) {
+		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+							 enable ? 0 : 1);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+							 enable ? 0 : 1);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+							 enable ? 0 : 1);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+							 enable ? 0 : 1);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+							 enable ? 0 : 1);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+							 enable ? 1 : 0);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+				                         enable ? 1 : 0);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+							 enable ? 1 : 0);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+							 enable ? 1 : 0);
+		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+							 enable ? 0 : 1);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+	} else {
+		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
+
+		if (enable) {
+			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
+			if (!adev->enable_mes_kiq)
+				data = REG_SET_FIELD(data, CP_MEC_CNTL,
+						     MEC_ME2_HALT, 0);
+		} else {
+			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
+			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
+	}
+
+	udelay(50);
+}
+
+static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v1_0 *mec_hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+	u32 *fw = NULL;
+	int r;
+
+	if (!adev->gfx.mec_fw)
+		return -EINVAL;
+
+	gfx_v11_0_cp_compute_enable(adev, false);
+
+	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+	fw_data = (const __le32 *)
+		(adev->gfx.mec_fw->data +
+		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+					  &adev->gfx.mec.mec_fw_obj,
+					  &adev->gfx.mec.mec_fw_gpu_addr,
+					  (void **)&fw);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+		gfx_v11_0_mec_fini(adev);
+		return r;
+	}
+
+	memcpy(fw, fw_data, fw_size);
+
+	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+
+	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
+
+	/* MEC1 */
+	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
+
+	for (i = 0; i < mec_hdr->jt_size; i++)
+		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
+			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *mec_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	u32 tmp, fw_ucode_size, fw_data_size;
+	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+	u32 *fw_ucode_ptr, *fw_data_ptr;
+	int r;
+
+	if (!adev->gfx.mec_fw)
+		return -EINVAL;
+
+	gfx_v11_0_cp_compute_enable(adev, false);
+
+	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+				le32_to_cpu(mec_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+				le32_to_cpu(mec_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.mec.mec_fw_obj,
+				      &adev->gfx.mec.mec_fw_gpu_addr,
+				      (void **)&fw_ucode_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+		gfx_v11_0_mec_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev, fw_data_size,
+				      64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM |
+				      AMDGPU_GEM_DOMAIN_GTT,
+				      &adev->gfx.mec.mec_fw_data_obj,
+				      &adev->gfx.mec.mec_fw_data_gpu_addr,
+				      (void **)&fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+		gfx_v11_0_mec_fini(adev);
+		return r;
+	}
+
+	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+	memcpy(fw_data_ptr, fw_data, fw_data_size);
+
+	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+		soc21_grbm_select(adev, 1, i, 0, 0);
+
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
+
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+					mec_hdr->ucode_start_addr_lo >> 2 |
+					mec_hdr->ucode_start_addr_hi << 30);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+					mec_hdr->ucode_start_addr_hi >> 2);
+
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+	}
+	mutex_unlock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+				       INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+				       INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
+{
+	uint32_t tmp;
+	struct amdgpu_device *adev = ring->adev;
+
+	/* tell RLC which is KIQ queue */
+	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+	tmp &= 0xffffff00;
+	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+	/* set graphics engine doorbell range */
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+	/* set compute engine doorbell range */
+	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+		     (adev->doorbell_index.kiq * 2) << 2);
+	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+		     (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+					   struct v11_gfx_mqd *mqd,
+					   struct amdgpu_mqd_prop *prop)
+{
+	bool priority = 0;
+	u32 tmp;
+
+	/* set up default queue priority level
+	 * 0x0 = low priority, 0x1 = high priority
+	 */
+	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+		priority = 1;
+
+	tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+	mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
+static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+				  struct amdgpu_mqd_prop *prop)
+{
+	struct v11_gfx_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr;
+	uint32_t tmp;
+	uint32_t rb_bufsz;
+
+	/* set up gfx hqd wptr */
+	mqd->cp_gfx_hqd_wptr = 0;
+	mqd->cp_gfx_hqd_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set up mqd control */
+	tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+	mqd->cp_gfx_mqd_control = tmp;
+
+	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+	tmp = regCP_GFX_HQD_VMID_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+	mqd->cp_gfx_hqd_vmid = 0;
+
+	/* set up gfx queue priority */
+	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
+
+	/* set up time quantum */
+	tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+	mqd->cp_gfx_hqd_quantum = tmp;
+
+	/* set up gfx hqd base. this is similar as CP_RB_BASE */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_gfx_hqd_rptr_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up rb_wptr_poll addr */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+	tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+	if (!prop->kernel_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+	mqd->cp_gfx_hqd_cntl = tmp;
+
+	/* set up cp_doorbell_control */
+	tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+	} else
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	mqd->cp_rb_doorbell_control = tmp;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+	/* active the queue */
+	mqd->cp_gfx_hqd_active = 1;
+
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+	return 0;
+}
+
+static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		soc21_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		/* restore mqd with the backup copy */
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset the ring */
+		ring->wptr = 0;
+		*ring->wptr_cpu_addr = 0;
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_gfx_enable_kgq(adev, 0);
+	if (r)
+		return r;
+
+	return gfx_v11_0_cp_gfx_start(adev);
+}
+
+static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+				      struct amdgpu_mqd_prop *prop)
+{
+	struct v11_compute_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+	uint32_t tmp;
+
+	mqd->header = 0xC0310800;
+	mqd->compute_pipelinestat_enable = 0x00000001;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+	mqd->compute_misc_reserved = 0x00000007;
+
+	eop_base_addr = prop->eop_gpu_addr >> 8;
+	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
+
+	mqd->cp_hqd_eop_control = tmp;
+
+	/* enable doorbell? */
+	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_SOURCE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_HIT, 0);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	}
+
+	mqd->cp_hqd_pq_doorbell_control = tmp;
+
+	/* disable the queue if it's active */
+	mqd->cp_hqd_dequeue_request = 0;
+	mqd->cp_hqd_pq_rptr = 0;
+	mqd->cp_hqd_pq_wptr_lo = 0;
+	mqd->cp_hqd_pq_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	tmp = regCP_MQD_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+	mqd->cp_mqd_control = tmp;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+			    (order_base_2(prop->queue_size / 4) - 1));
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
+			    prop->allow_tunneling);
+	if (prop->kernel_queue) {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+	}
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+	mqd->cp_hqd_pq_control = tmp;
+
+	/* set the wb address whether it's enabled or not */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	tmp = 0;
+	/* enable the doorbell if requested */
+	if (prop->use_doorbell) {
+		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				DOORBELL_OFFSET, prop->doorbell_index);
+
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_SOURCE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_HIT, 0);
+	}
+
+	mqd->cp_hqd_pq_doorbell_control = tmp;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+	/* set the vmid for the queue */
+	mqd->cp_hqd_vmid = 0;
+
+	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+	mqd->cp_hqd_persistent_state = tmp;
+
+	/* set MIN_IB_AVAIL_SIZE */
+	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
+	/* set static priority for a compute queue/ring */
+	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+	mqd->cp_hqd_active = prop->hqd_active;
+
+	/* set UQ fenceaddress */
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+	return 0;
+}
+
+static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v11_compute_mqd *mqd = ring->mqd_ptr;
+	int j;
+
+	/* inactivate the queue */
+	if (amdgpu_sriov_vf(adev))
+		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+	/* disable wptr polling */
+	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+	/* write the EOP addr */
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+	       mqd->cp_hqd_eop_base_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+	       mqd->cp_hqd_eop_base_addr_hi);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+	       mqd->cp_hqd_eop_control);
+
+	/* enable doorbell? */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+	       mqd->cp_hqd_pq_doorbell_control);
+
+	/* disable the queue if it's active */
+	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+		for (j = 0; j < adev->usec_timeout; j++) {
+			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+		       mqd->cp_hqd_dequeue_request);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+		       mqd->cp_hqd_pq_rptr);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+		       mqd->cp_hqd_pq_wptr_lo);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+		       mqd->cp_hqd_pq_wptr_hi);
+	}
+
+	/* set the pointer to the MQD */
+	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+	       mqd->cp_mqd_base_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+	       mqd->cp_mqd_base_addr_hi);
+
+	/* set MQD vmid to 0 */
+	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+	       mqd->cp_mqd_control);
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+	       mqd->cp_hqd_pq_base_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+	       mqd->cp_hqd_pq_base_hi);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+	       mqd->cp_hqd_pq_control);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+		mqd->cp_hqd_pq_rptr_report_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+		mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+			(adev->doorbell_index.kiq * 2) << 2);
+		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+			(adev->doorbell_index.userqueue_end * 2) << 2);
+	}
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+	       mqd->cp_hqd_pq_doorbell_control);
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+	       mqd->cp_hqd_pq_wptr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+	       mqd->cp_hqd_pq_wptr_hi);
+
+	/* set the vmid for the queue */
+	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+	       mqd->cp_hqd_persistent_state);
+
+	/* activate the queue */
+	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+	       mqd->cp_hqd_active);
+
+	if (ring->use_doorbell)
+		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+	return 0;
+}
+
+static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v11_compute_mqd *mqd = ring->mqd_ptr;
+
+	gfx_v11_0_kiq_setting(ring);
+
+	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+		/* reset MQD to a clean status */
+		if (adev->gfx.kiq[0].mqd_backup)
+			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
+
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
+
+		mutex_lock(&adev->srbm_mutex);
+		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v11_0_kiq_init_register(ring);
+		soc21_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	} else {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
+		mutex_lock(&adev->srbm_mutex);
+		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		gfx_v11_0_kiq_init_register(ring);
+		soc21_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.kiq[0].mqd_backup)
+			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v11_compute_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		soc21_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		/* restore MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset ring buffer */
+		ring->wptr = 0;
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
+{
+	gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+	return 0;
+}
+
+static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	if (!amdgpu_async_gfx_ring)
+		gfx_v11_0_cp_compute_enable(adev, true);
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+		if (r)
+			return r;
+	}
+
+	return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+	struct amdgpu_ring *ring;
+
+	if (!(adev->flags & AMD_IS_APU))
+		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		/* legacy firmware loading */
+		r = gfx_v11_0_cp_gfx_load_microcode(adev);
+		if (r)
+			return r;
+
+		if (adev->gfx.rs64_enable)
+			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
+		else
+			r = gfx_v11_0_cp_compute_load_microcode(adev);
+		if (r)
+			return r;
+	}
+
+	gfx_v11_0_cp_set_doorbell_range(adev);
+
+	if (amdgpu_async_gfx_ring) {
+		gfx_v11_0_cp_compute_enable(adev, true);
+		gfx_v11_0_cp_gfx_enable(adev, true);
+	}
+
+	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+		r = amdgpu_mes_kiq_hw_init(adev);
+	else
+		r = gfx_v11_0_kiq_resume(adev);
+	if (r)
+		return r;
+
+	r = gfx_v11_0_kcq_resume(adev);
+	if (r)
+		return r;
+
+	if (!amdgpu_async_gfx_ring) {
+		r = gfx_v11_0_cp_gfx_resume(adev);
+		if (r)
+			return r;
+	} else {
+		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
+		if (r)
+			return r;
+	}
+
+	if (adev->gfx.disable_kq) {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			/* we don't want to set ring->ready */
+			r = amdgpu_ring_test_ring(ring);
+			if (r)
+				return r;
+		}
+		if (amdgpu_async_gfx_ring)
+			amdgpu_gfx_disable_kgq(adev, 0);
+	} else {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			r = amdgpu_ring_test_helper(ring);
+			if (r)
+				return r;
+		}
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+	gfx_v11_0_cp_gfx_enable(adev, enable);
+	gfx_v11_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+	int r;
+	bool value;
+
+	r = adev->gfxhub.funcs->gart_enable(adev);
+	if (r)
+		return r;
+
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+	/* TODO investigate why this and the hdp flush above is needed,
+	 * are we missing a flush somewhere else? */
+	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+	return 0;
+}
+
+static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	/* select RS64 */
+	if (adev->gfx.rs64_enable) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
+		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
+		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
+
+		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
+		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
+		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
+	}
+
+	if (amdgpu_emu_mode == 1)
+		msleep(100);
+}
+
+static int get_gb_addr_config(struct amdgpu_device * adev)
+{
+	u32 gb_addr_config;
+
+	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+	if (gb_addr_config == 0)
+		return -EINVAL;
+
+	adev->gfx.config.gb_addr_config_fields.num_pkrs =
+		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+	adev->gfx.config.gb_addr_config = gb_addr_config;
+
+	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_PIPES);
+
+	adev->gfx.config.max_tile_pipes =
+		adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
+	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+	return 0;
+}
+
+static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+				       adev->gfx.cleaner_shader_ptr);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		if (adev->gfx.imu.funcs) {
+			/* RLC autoload sequence 1: Program rlc ram */
+			if (adev->gfx.imu.funcs->program_rlc_ram)
+				adev->gfx.imu.funcs->program_rlc_ram(adev);
+			/* rlc autoload firmware */
+			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
+			if (r)
+				return r;
+		}
+	} else {
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+				if (adev->gfx.imu.funcs->load_microcode)
+					adev->gfx.imu.funcs->load_microcode(adev);
+				if (adev->gfx.imu.funcs->setup_imu)
+					adev->gfx.imu.funcs->setup_imu(adev);
+				if (adev->gfx.imu.funcs->start_imu)
+					adev->gfx.imu.funcs->start_imu(adev);
+			}
+
+			/* disable gpa mode in backdoor loading */
+			gfx_v11_0_disable_gpa_mode(adev);
+		}
+	}
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+			return r;
+		}
+	}
+
+	adev->gfx.is_poweron = true;
+
+	if(get_gb_addr_config(adev))
+		DRM_WARN("Invalid gb_addr_config !\n");
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+	    adev->gfx.rs64_enable)
+		gfx_v11_0_config_gfx_rs64(adev);
+
+	r = gfx_v11_0_gfxhub_enable(adev);
+	if (r)
+		return r;
+
+	if (!amdgpu_emu_mode)
+		gfx_v11_0_init_golden_registers(adev);
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
+	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
+		/**
+		 * For gfx 11, rlc firmware loading relies on smu firmware is
+		 * loaded firstly, so in direct type, it has to load smc ucode
+		 * here before rlc.
+		 */
+		r = amdgpu_pm_load_smu_firmware(adev, NULL);
+		if (r)
+			return r;
+	}
+
+	gfx_v11_0_constants_init(adev);
+
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		gfx_v11_0_select_cp_fw_arch(adev);
+
+	if (adev->nbio.funcs->gc_doorbell_init)
+		adev->nbio.funcs->gc_doorbell_init(adev);
+
+	r = gfx_v11_0_rlc_resume(adev);
+	if (r)
+		return r;
+
+	/*
+	 * init golden registers and rlc resume may override some registers,
+	 * reconfig them here
+	 */
+	gfx_v11_0_tcp_harvest(adev);
+
+	r = gfx_v11_0_cp_resume(adev);
+	if (r)
+		return r;
+
+	/* get IMU version from HW if it's not set */
+	if (!adev->gfx.imu_fw_version)
+		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
+
+	return r;
+}
+
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+					      bool enable)
+{
+	unsigned int irq_type;
+	int m, p, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+		for (m = 0; m < adev->gfx.me.num_me; m++) {
+			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+					+ (m * adev->gfx.mec.num_pipe_per_mec)
+					+ p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+	gfx_v11_0_set_userq_eop_interrupts(adev, false);
+
+	if (!adev->no_hw_access) {
+		if (amdgpu_async_gfx_ring &&
+		    !adev->gfx.disable_kq) {
+			if (amdgpu_gfx_disable_kgq(adev, 0))
+				DRM_ERROR("KGQ disable failed\n");
+		}
+
+		if (amdgpu_gfx_disable_kcq(adev, 0))
+			DRM_ERROR("KCQ disable failed\n");
+
+		amdgpu_mes_kiq_hw_fini(adev);
+	}
+
+	if (amdgpu_sriov_vf(adev))
+		/* Remove the steps disabling CPG and clearing KIQ position,
+		 * so that CP could perform IDLE-SAVE during switch. Those
+		 * steps are necessary to avoid a DMAR error in gfx9 but it is
+		 * not reproduced on gfx11.
+		 */
+		return 0;
+
+	gfx_v11_0_cp_enable(adev, false);
+	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
+
+	adev->gfxhub.funcs->gart_disable(adev);
+
+	adev->gfx.is_poweron = false;
+
+	return 0;
+}
+
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v11_0_hw_fini(ip_block);
+}
+
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v11_0_hw_init(ip_block);
+}
+
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+				GRBM_STATUS, GUI_ACTIVE))
+		return false;
+	else
+		return true;
+}
+
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned i;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+			GRBM_STATUS__GUI_ACTIVE_MASK;
+
+		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+				      bool req)
+{
+	u32 i, tmp, val;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* Request with MeId=2, PipeId=0 */
+		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+		if (req) {
+			if (val == tmp)
+				break;
+		} else {
+			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+					    REQUEST, 1);
+
+			/* unlocked or locked by firmware */
+			if (val != tmp)
+				break;
+		}
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	u32 grbm_soft_reset = 0;
+	u32 tmp;
+	int r, i, j, k;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				soc21_grbm_select(adev, i, k, j, 0);
+
+				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+			}
+		}
+	}
+	for (i = 0; i < adev->gfx.me.num_me; ++i) {
+		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+				soc21_grbm_select(adev, i, k, j, 0);
+
+				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
+			}
+		}
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
+	mutex_lock(&adev->gfx.reset_sem_mutex);
+	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
+	if (r) {
+		mutex_unlock(&adev->gfx.reset_sem_mutex);
+		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
+		return r;
+	}
+
+	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
+
+	// Read CP_VMID_RESET register three times.
+	// to get sufficient time for GFX_HQD_ACTIVE reach 0
+	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+
+	/* release the gfx mutex */
+	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
+	mutex_unlock(&adev->gfx.reset_sem_mutex);
+	if (r) {
+		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
+		return r;
+	}
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
+		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
+			break;
+		udelay(1);
+	}
+	if (i >= adev->usec_timeout) {
+		printk("Failed to wait all pipes clean\n");
+		return -EINVAL;
+	}
+
+	/**********  trigger soft reset  ***********/
+	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CP, 1);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_GFX, 1);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPF, 1);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPC, 1);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPG, 1);
+	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+	/**********  exit soft reset  ***********/
+	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CP, 0);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_GFX, 0);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPF, 0);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPC, 0);
+	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+					SOFT_RESET_CPG, 0);
+	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
+	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
+
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
+			break;
+		udelay(1);
+	}
+	if (i >= adev->usec_timeout) {
+		printk("Failed to wait CP_VMID_RESET to 0\n");
+		return -EINVAL;
+	}
+
+	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+	return gfx_v11_0_cp_resume(adev);
+}
+
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	int i, r;
+	struct amdgpu_device *adev = ip_block->adev;
+	struct amdgpu_ring *ring;
+	long tmo = msecs_to_jiffies(1000);
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		ring = &adev->gfx.gfx_ring[i];
+		r = amdgpu_ring_test_ib(ring, tmo);
+		if (r)
+			return true;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		r = amdgpu_ring_test_ib(ring, tmo);
+		if (r)
+			return true;
+	}
+
+	return false;
+}
+
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	/**
+	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
+	 */
+	return amdgpu_mes_resume(adev);
+}
+
+static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+	uint64_t clock;
+	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_gfx_off_ctrl(adev, false);
+		mutex_lock(&adev->gfx.gpu_clock_mutex);
+		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
+		if (clock_counter_hi_pre != clock_counter_hi_after)
+			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
+		mutex_unlock(&adev->gfx.gpu_clock_mutex);
+		amdgpu_gfx_off_ctrl(adev, true);
+	} else {
+		preempt_disable();
+		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+		if (clock_counter_hi_pre != clock_counter_hi_after)
+			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+		preempt_enable();
+	}
+	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+	return clock;
+}
+
+static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+					   uint32_t vmid,
+					   uint32_t gds_base, uint32_t gds_size,
+					   uint32_t gws_base, uint32_t gws_size,
+					   uint32_t oa_base, uint32_t oa_size)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* GDS Base */
+	gfx_v11_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
+				    gds_base);
+
+	/* GDS Size */
+	gfx_v11_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
+				    gds_size);
+
+	/* GWS */
+	gfx_v11_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
+				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+	/* OA */
+	gfx_v11_0_write_data_to_reg(ring, 0, false,
+				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
+				    (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = true;
+		break;
+	case 1:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = false;
+		break;
+	case 2:
+		adev->gfx.disable_kq = true;
+		adev->gfx.disable_uq = false;
+		break;
+	}
+
+	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
+
+	if (adev->gfx.disable_kq) {
+		/* We need one GFX ring temporarily to set up
+		 * the clear state.
+		 */
+		adev->gfx.num_gfx_rings = 1;
+		adev->gfx.num_compute_rings = 0;
+	} else {
+		adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+						  AMDGPU_MAX_COMPUTE_RINGS);
+	}
+
+	gfx_v11_0_set_kiq_pm4_funcs(adev);
+	gfx_v11_0_set_ring_funcs(adev);
+	gfx_v11_0_set_irq_funcs(adev);
+	gfx_v11_0_set_gds_init(adev);
+	gfx_v11_0_set_rlc_funcs(adev);
+	gfx_v11_0_set_mqd_funcs(adev);
+	gfx_v11_0_set_imu_funcs(adev);
+
+	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
+
+	return gfx_v11_0_init_microcode(adev);
+}
+
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+	if (r)
+		return r;
+
+	r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+	uint32_t rlc_cntl;
+
+	/* if RLC is not enabled, do nothing */
+	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	uint32_t data;
+	unsigned i;
+
+	data = RLC_SAFE_MODE__CMD_MASK;
+	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+	/* wait for RLC_SAFE_MODE */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+				   RLC_SAFE_MODE, CMD))
+			break;
+		udelay(1);
+	}
+}
+
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
+{
+	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+				      bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
+				       bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
+{
+	uint32_t data, def;
+
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+		return;
+
+	/* It is disabled by HW by default */
+	if (enable) {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
+			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+			if (def != data)
+				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+		}
+	} else {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+			if (def != data)
+				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+		}
+	}
+}
+
+static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags &
+	      (AMD_CG_SUPPORT_GFX_CGCG |
+	      AMD_CG_SUPPORT_GFX_CGLS |
+	      AMD_CG_SUPPORT_GFX_3D_CGCG |
+	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
+		return;
+
+	if (enable) {
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+		/* unset CGCG override */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+		/* update CGCG override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* enable cgcg FSM(0x0000363F) */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+		}
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+		}
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+		/* Program RLC_CGCG_CGLS_CTRL_3D */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		}
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+		}
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+		/* set IDLE_POLL_COUNT(0x00900100) */
+		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+		if (adev->sdma.num_instances > 1) {
+			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+		}
+	} else {
+		/* Program RLC_CGCG_CGLS_CTRL */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+		/* Program RLC_CGCG_CGLS_CTRL_3D */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+		if (adev->sdma.num_instances > 1) {
+			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+		}
+	}
+}
+
+static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
+
+	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
+
+	gfx_v11_0_update_repeater_fgcg(adev, enable);
+
+	gfx_v11_0_update_sram_fgcg(adev, enable);
+
+	gfx_v11_0_update_perf_clk(adev, enable);
+
+	if (adev->cg_flags &
+	    (AMD_CG_SUPPORT_GFX_MGCG |
+	     AMD_CG_SUPPORT_GFX_CGLS |
+	     AMD_CG_SUPPORT_GFX_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGLS))
+	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+	return 0;
+}
+
+static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
+{
+	u32 reg, pre_data, data;
+
+	amdgpu_gfx_off_ctrl(adev, false);
+	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
+		pre_data = RREG32_NO_KIQ(reg);
+	else
+		pre_data = RREG32(reg);
+
+	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
+	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+	if (pre_data != data) {
+		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
+			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+		} else
+			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+	}
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	if (ring
+		&& amdgpu_sriov_is_pp_one_vf(adev)
+		&& (pre_data != data)
+		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+		amdgpu_ring_emit_wreg(ring, reg, data);
+	}
+}
+
+static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
+	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
+	.set_safe_mode = gfx_v11_0_set_safe_mode,
+	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
+	.init = gfx_v11_0_rlc_init,
+	.get_csb_size = gfx_v11_0_get_csb_size,
+	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
+	.resume = gfx_v11_0_rlc_resume,
+	.stop = gfx_v11_0_rlc_stop,
+	.reset = gfx_v11_0_rlc_reset,
+	.start = gfx_v11_0_rlc_start,
+	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
+};
+
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+	// Program RLC_PG_DELAY3 for CGPG hysteresis
+	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+		case IP_VERSION(11, 0, 1):
+		case IP_VERSION(11, 0, 4):
+		case IP_VERSION(11, 5, 0):
+		case IP_VERSION(11, 5, 1):
+		case IP_VERSION(11, 5, 2):
+		case IP_VERSION(11, 5, 3):
+			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	gfx_v11_cntl_power_gating(adev, enable);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					   enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	bool enable = (state == AMD_PG_STATE_GATE);
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+		amdgpu_gfx_off_ctrl(adev, enable);
+		break;
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+		if (!enable)
+			amdgpu_gfx_off_ctrl(adev, false);
+
+		gfx_v11_cntl_pg(adev, enable);
+
+		if (enable)
+			amdgpu_gfx_off_ctrl(adev, true);
+
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					  enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (amdgpu_sriov_vf(adev))
+	        return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 5, 1):
+	case IP_VERSION(11, 5, 2):
+	case IP_VERSION(11, 5, 3):
+	        gfx_v11_0_update_gfx_clock_gating(adev,
+	                        state ==  AMD_CG_STATE_GATE);
+	        break;
+	default:
+	        break;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int data;
+
+	/* AMD_CG_SUPPORT_GFX_MGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+	/* AMD_CG_SUPPORT_REPEATER_FGCG */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+	/* AMD_CG_SUPPORT_GFX_FGCG */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+	/* AMD_CG_SUPPORT_GFX_CGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	/* gfx11 is 32bit rptr*/
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	} else {
+		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+	}
+
+	return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+			     lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+			     upper_32_bits(ring->wptr));
+	}
+}
+
+static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+	/* gfx11 hardware is 32bit rptr */
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell)
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	else
+		BUG();
+	return wptr;
+}
+
+static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		BUG(); /* only DOORBELL method supported on gfx11 now */
+	}
+}
+
+static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 ref_and_mask, reg_mem_engine;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+		switch (ring->me) {
+		case 1:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+			break;
+		case 2:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+			break;
+		default:
+			return;
+		}
+		reg_mem_engine = 0;
+	} else {
+		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
+		reg_mem_engine = 1; /* pfp */
+	}
+
+	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+			       ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+				       struct amdgpu_job *job,
+				       struct amdgpu_ib *ib,
+				       uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 header, control = 0;
+
+	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+	control |= ib->length_dw | (vmid << 24);
+
+	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+		control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+		if (flags & AMDGPU_IB_PREEMPTED)
+			control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+		if (vmid && !ring->adev->gfx.rs64_enable)
+			gfx_v11_0_ring_emit_de_meta(ring,
+				!amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
+	}
+
+	amdgpu_ring_write(ring, header);
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+		(2 << 0) |
+#endif
+		lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+					   struct amdgpu_job *job,
+					   struct amdgpu_ib *ib,
+					   uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+				(2 << 0) |
+#endif
+				lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+				     u64 seq, unsigned flags)
+{
+	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+	/* RELEASE_MEM - flush caches, send int */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
+				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
+				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+	/*
+	 * the address should be Qword aligned if 64bit write, Dword
+	 * aligned if only send 32bit data low (discard data high)
+	 */
+	if (write64bit)
+		BUG_ON(addr & 0x7);
+	else
+		BUG_ON(addr & 0x3);
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+	amdgpu_ring_write(ring, upper_32_bits(seq));
+	amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+			       upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+				   uint16_t pasid, uint32_t flush_type,
+				   bool all_hub, uint8_t dst_sel)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					 unsigned vmid, uint64_t pd_addr)
+{
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	/* compute doesn't have PFP */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+		/* sync PFP to ME, otherwise we might get invalid PFP reads */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		amdgpu_ring_write(ring, 0x0);
+	}
+
+	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
+	 * changed in any way.
+	 */
+	ring->set_q_mode_offs = 0;
+	ring->set_q_mode_ptr = NULL;
+}
+
+static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+					  u64 seq, unsigned int flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* we only allocate 32bit for each seq wb address */
+	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	/* write fence seq to the "addr" */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+
+	if (flags & AMDGPU_FENCE_FLAG_INT) {
+		/* set register to trigger INT */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+	}
+}
+
+static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+					 uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		dw2 |= 0x10002;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+						   uint64_t addr)
+{
+	unsigned ret;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	/* discard following DWs if *cond_exec_gpu_addr==0 */
+	amdgpu_ring_write(ring, 0);
+	ret = ring->wptr & ring->buf_mask;
+	/* patch dummy value later */
+	amdgpu_ring_write(ring, 0);
+
+	return ret;
+}
+
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+					   u64 shadow_va, u64 csa_va,
+					   u64 gds_va, bool init_shadow,
+					   int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	unsigned int offs, end;
+
+	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
+		return;
+
+	/*
+	 * The logic here isn't easy to understand because we need to keep state
+	 * accross multiple executions of the function as well as between the
+	 * CPU and GPU. The general idea is that the newly written GPU command
+	 * has a condition on the previous one and only executed if really
+	 * necessary.
+	 */
+
+	/*
+	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
+	 * executed or not. Reserve 64bits just to be on the save side.
+	 */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
+	offs = ring->wptr & ring->buf_mask;
+
+	/*
+	 * We start with skipping the prefix SET_Q_MODE and always executing
+	 * the postfix SET_Q_MODE packet. This is changed below with a
+	 * WRITE_DATA command when the postfix executed.
+	 */
+	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
+	amdgpu_ring_write(ring, 0);
+
+	if (ring->set_q_mode_offs) {
+		uint64_t addr;
+
+		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+		addr += ring->set_q_mode_offs << 2;
+		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
+	}
+
+	/*
+	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
+	 * next prefix SET_Q_MODE packet executes as well.
+	 */
+	if (!shadow_va) {
+		uint64_t addr;
+
+		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
+		addr += offs << 2;
+		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+		amdgpu_ring_write(ring, lower_32_bits(addr));
+		amdgpu_ring_write(ring, upper_32_bits(addr));
+		amdgpu_ring_write(ring, 0x1);
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+	amdgpu_ring_write(ring, lower_32_bits(gds_va));
+	amdgpu_ring_write(ring, upper_32_bits(gds_va));
+	amdgpu_ring_write(ring, lower_32_bits(csa_va));
+	amdgpu_ring_write(ring, upper_32_bits(csa_va));
+	amdgpu_ring_write(ring, shadow_va ?
+			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+	amdgpu_ring_write(ring, init_shadow ?
+			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+
+	if (ring->set_q_mode_offs)
+		amdgpu_ring_patch_cond_exec(ring, end);
+
+	if (shadow_va) {
+		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
+
+		/*
+		 * If the tokens match try to skip the last postfix SET_Q_MODE
+		 * packet to avoid saving/restoring the state all the time.
+		 */
+		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
+			*ring->set_q_mode_ptr = 0;
+
+		ring->set_q_mode_token = token;
+	} else {
+		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
+	}
+
+	ring->set_q_mode_offs = offs;
+}
+
+static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (adev->enable_mes)
+		return -EINVAL;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ++ring->trail_seq);
+	amdgpu_ring_commit(kiq_ring);
+
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+	}
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
+}
+
+static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v10_de_ib_state de_payload = {0};
+	uint64_t offset, gds_addr, de_payload_gpu_addr;
+	void *de_payload_cpu_addr;
+	int cnt;
+
+	offset = offsetof(struct v10_gfx_meta_data, de_payload);
+	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+			 PAGE_SIZE);
+
+	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+				 WRITE_DATA_DST_SEL(8) |
+				 WR_CONFIRM) |
+				 WRITE_DATA_CACHE_POLICY(0));
+	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+					   sizeof(de_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+					   sizeof(de_payload) >> 2);
+}
+
+static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
+				    bool secure)
+{
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t reg_val_offs)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+	amdgpu_ring_write(ring, 0 |	/* src: register*/
+				(5 << 8) |	/* dst: memory */
+				(1 << 20));	/* write confirm */
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+}
+
+static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+				   uint32_t val)
+{
+	uint32_t cmd = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+		break;
+	case AMDGPU_RING_TYPE_KIQ:
+		cmd = (1 << 16); /* no inc addr */
+		break;
+	default:
+		cmd = WR_CONFIRM;
+		break;
+	}
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, cmd);
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
+{
+	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+			       ref, mask, 0x20);
+}
+
+static void
+gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+				      uint32_t me, uint32_t pipe,
+				      enum amdgpu_interrupt_state state)
+{
+	uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+	if (!me) {
+		switch (pipe) {
+		case 0:
+			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+			break;
+		case 1:
+			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 0);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    GENERIC0_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 1);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    GENERIC0_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+						     int me, int pipe,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 mec_int_cntl, mec_int_cntl_reg;
+
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+
+	if (me == 1) {
+		switch (pipe) {
+		case 0:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+			break;
+		case 1:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+			break;
+		case 2:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
+			break;
+		case 3:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 0);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     GENERIC0_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 1);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     GENERIC0_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	u32 doorbell_offset = entry->src_data[0];
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+	int i;
+
+	DRM_DEBUG("IH: CP EOP\n");
+
+	if (adev->enable_mes && doorbell_offset) {
+		struct amdgpu_userq_fence_driver *fence_drv = NULL;
+		struct xarray *xa = &adev->userq_xa;
+		unsigned long flags;
+
+		xa_lock_irqsave(xa, flags);
+		fence_drv = xa_load(xa, doorbell_offset);
+		if (fence_drv)
+			amdgpu_userq_fence_driver_process(fence_drv);
+		xa_unlock_irqrestore(xa, flags);
+	} else {
+		me_id = (entry->ring_id & 0x0c) >> 2;
+		pipe_id = (entry->ring_id & 0x03) >> 0;
+		queue_id = (entry->ring_id & 0x70) >> 4;
+
+		switch (me_id) {
+		case 0:
+			if (pipe_id == 0)
+				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+			else
+				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				/* Per-queue interrupt is supported for MEC starting from VI.
+				 * The interrupt can only be enabled/disabled per pipe instead
+				 * of per queue.
+				 */
+				if ((ring->me == me_id) &&
+				    (ring->pipe == pipe_id) &&
+				    (ring->queue == queue_id))
+					amdgpu_fence_process(ring);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+					      struct amdgpu_irq_src *source,
+					      unsigned int type,
+					      enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *source,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+					       struct amdgpu_irq_src *source,
+					       unsigned int type,
+					       enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_INSTR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
+					struct amdgpu_iv_entry *entry)
+{
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+	int i;
+
+	me_id = (entry->ring_id & 0x0c) >> 2;
+	pipe_id = (entry->ring_id & 0x03) >> 0;
+	queue_id = (entry->ring_id & 0x70) >> 4;
+
+	if (!adev->gfx.disable_kq) {
+		switch (me_id) {
+		case 0:
+			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+				ring = &adev->gfx.gfx_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal register access in command stream\n");
+	gfx_v11_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal opcode in command stream \n");
+	gfx_v11_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
+				   struct amdgpu_irq_src *source,
+				   struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal instruction in command stream\n");
+	gfx_v11_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
+		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
+
+	return 0;
+}
+
+#if 0
+static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+					     struct amdgpu_irq_src *src,
+					     unsigned int type,
+					     enum amdgpu_interrupt_state state)
+{
+	uint32_t tmp, target;
+	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
+
+	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+	target += ring->pipe;
+
+	switch (type) {
+	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+		if (state == AMDGPU_IRQ_STATE_DISABLE) {
+			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 0);
+			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+			tmp = RREG32_SOC15_IP(GC, target);
+			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 0);
+			WREG32_SOC15_IP(GC, target, tmp);
+		} else {
+			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
+			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 1);
+			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
+
+			tmp = RREG32_SOC15_IP(GC, target);
+			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
+					    GENERIC2_INT_ENABLE, 1);
+			WREG32_SOC15_IP(GC, target, tmp);
+		}
+		break;
+	default:
+		BUG(); /* kiq only support GENERIC2_INT now */
+		break;
+	}
+	return 0;
+}
+#endif
+
+static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	const unsigned int gcr_cntl =
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+	/* Disable the pipe reset until the CPFW fully support it.*/
+	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+	return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v11_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v11_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	switch (ring->pipe) {
+	case 0:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 0);
+		break;
+	case 1:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+						RS64_FW_UC_START_ADDR_LO;
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v11_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+	 * so the pipe reset status relies on the later gfx ring test result.
+	 */
+	return 0;
+}
+
+static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+	if (r) {
+
+		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+		r = gfx_v11_reset_gfx_pipe(ring);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v11_0_kgq_init_queue(ring, true);
+	if (r) {
+		dev_err(adev->dev, "failed to init kgq\n");
+		return r;
+	}
+
+	r = amdgpu_mes_map_legacy_queue(adev, ring);
+	if (r) {
+		dev_err(adev->dev, "failed to remap kgq\n");
+		return r;
+	}
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v11_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v11_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	clean_pipe = reset_pipe;
+
+	if (adev->gfx.rs64_enable) {
+
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 0);
+			break;
+		case 2:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 0);
+			break;
+		case 3:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 0);
+			break;
+		default:
+			break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+					RS64_FW_UC_START_ADDR_LO;
+	} else {
+		if (ring->me == 1) {
+			switch (ring->pipe) {
+			case 0:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 0);
+				break;
+			case 1:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 0);
+				break;
+			case 2:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE2_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE2_RESET, 0);
+				break;
+			case 3:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE3_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE3_RESET, 0);
+				break;
+			default:
+				break;
+			}
+			/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+		} else {
+			switch (ring->pipe) {
+			case 0:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE0_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE0_RESET, 0);
+				break;
+			case 1:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE1_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE1_RESET, 0);
+				break;
+			case 2:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE2_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE2_RESET, 0);
+				break;
+			case 3:
+				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE3_RESET, 1);
+				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME2_PIPE3_RESET, 0);
+				break;
+			default:
+				break;
+			}
+			/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+		r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
+	}
+
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v11_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+	 * reset status relies on the compute ring test result.
+	 */
+	return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r = 0;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+	if (r) {
+		dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+		r = gfx_v11_0_reset_compute_pipe(ring);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v11_0_kcq_init_queue(ring, true);
+	if (r) {
+		dev_err(adev->dev, "fail to init kcq\n");
+		return r;
+	}
+	r = amdgpu_mes_map_legacy_queue(adev, ring);
+	if (r) {
+		dev_err(adev->dev, "failed to remap kcq\n");
+		return r;
+	}
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	for (i = 0; i < reg_count; i++)
+		drm_printf(p, "%-50s \t 0x%08x\n",
+			   gc_reg_list_11_0[i].reg_name,
+			   adev->gfx.ip_dump_core[i]);
+
+	/* print compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.mec.num_mec,
+		   adev->gfx.mec.num_pipe_per_mec,
+		   adev->gfx.mec.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   "regCP_MEC_ME2_HEADER_DUMP",
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+					else
+						drm_printf(p, "%-50s \t 0x%08x\n",
+							   gc_cp_reg_list_11[reg].reg_name,
+							   adev->gfx.ip_dump_compute_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+
+	/* print gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.me.num_me,
+		   adev->gfx.me.num_pipe_per_me,
+		   adev->gfx.me.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					drm_printf(p, "%-50s \t 0x%08x\n",
+						   gc_gfx_queue_reg_list_11[reg].reg_name,
+						   adev->gfx.ip_dump_gfx_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+}
+
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	amdgpu_gfx_off_ctrl(adev, false);
+	for (i = 0; i < reg_count; i++)
+		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				/* ME0 is for GFX so start from 1 for CP */
+				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+				for (reg = 0; reg < reg_count; reg++) {
+					if (i &&
+					    gc_cp_reg_list_11[reg].reg_offset ==
+						    regCP_MEC_ME1_HEADER_DUMP)
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_OFFSET(GC, 0,
+							       regCP_MEC_ME2_HEADER_DUMP));
+					else
+						adev->gfx.ip_dump_compute_queues[index + reg] =
+							RREG32(SOC15_REG_ENTRY_OFFSET(
+								       gc_cp_reg_list_11[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				soc21_grbm_select(adev, i, j, k, 0);
+
+				for (reg = 0; reg < reg_count; reg++) {
+					adev->gfx.ip_dump_gfx_queues[index + reg] =
+						RREG32(SOC15_REG_ENTRY_OFFSET(
+							gc_gfx_queue_reg_list_11[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+	/* Emit the cleaner shader */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_begin_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_end_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
+	.name = "gfx_v11_0",
+	.early_init = gfx_v11_0_early_init,
+	.late_init = gfx_v11_0_late_init,
+	.sw_init = gfx_v11_0_sw_init,
+	.sw_fini = gfx_v11_0_sw_fini,
+	.hw_init = gfx_v11_0_hw_init,
+	.hw_fini = gfx_v11_0_hw_fini,
+	.suspend = gfx_v11_0_suspend,
+	.resume = gfx_v11_0_resume,
+	.is_idle = gfx_v11_0_is_idle,
+	.wait_for_idle = gfx_v11_0_wait_for_idle,
+	.soft_reset = gfx_v11_0_soft_reset,
+	.check_soft_reset = gfx_v11_0_check_soft_reset,
+	.post_soft_reset = gfx_v11_0_post_soft_reset,
+	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
+	.set_powergating_state = gfx_v11_0_set_powergating_state,
+	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
+	.dump_ip_state = gfx_v11_ip_dump,
+	.print_ip_state = gfx_v11_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
+	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
+	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
+	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
+	.emit_frame_size = /* totally 247 maximum if 16 IBs */
+		5 + /* update_spm_vmid */
+		5 + /* COND_EXEC */
+		22 + /* SET_Q_PREEMPTION_MODE */
+		7 + /* PIPELINE_SYNC */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		4 + /* VM_FLUSH */
+		8 + /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		5 + /* COND_EXEC */
+		7 + /* HDP_flush */
+		4 + /* VGT_flush */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		22 + /* SET_Q_PREEMPTION_MODE */
+		8 + 8 + /* FENCE x2 */
+		8 + /* gfx_v11_0_emit_mem_sync */
+		2, /* gfx_v11_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
+	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
+	.emit_fence = gfx_v11_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v11_0_ring_test_ring,
+	.test_ib = gfx_v11_0_ring_test_ib,
+	.insert_nop = gfx_v11_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
+	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
+	.preempt_ib = gfx_v11_0_ring_preempt_ib,
+	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
+	.emit_wreg = gfx_v11_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+	.reset = gfx_v11_0_reset_kgq,
+	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v11_0_ring_begin_use,
+	.end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
+	.type = AMDGPU_RING_TYPE_COMPUTE,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		5 + /* update_spm_vmid */
+		20 + /* gfx_v11_0_ring_emit_gds_switch */
+		7 + /* gfx_v11_0_ring_emit_hdp_flush */
+		5 + /* hdp invalidate */
+		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v11_0_ring_emit_vm_flush */
+		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
+		8 + /* gfx_v11_0_emit_mem_sync */
+		2, /* gfx_v11_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v11_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v11_0_ring_test_ring,
+	.test_ib = gfx_v11_0_ring_test_ib,
+	.insert_nop = gfx_v11_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_wreg = gfx_v11_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
+	.reset = gfx_v11_0_reset_kcq,
+	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v11_0_ring_begin_use,
+	.end_use = gfx_v11_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
+	.type = AMDGPU_RING_TYPE_KIQ,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		20 + /* gfx_v11_0_ring_emit_gds_switch */
+		7 + /* gfx_v11_0_ring_emit_hdp_flush */
+		5 + /*hdp invalidate */
+		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
+	.test_ring = gfx_v11_0_ring_test_ring,
+	.test_ib = gfx_v11_0_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_rreg = gfx_v11_0_ring_emit_rreg,
+	.emit_wreg = gfx_v11_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
+	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
+	.set = gfx_v11_0_set_eop_interrupt_state,
+	.process = gfx_v11_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
+	.set = gfx_v11_0_set_priv_reg_fault_state,
+	.process = gfx_v11_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
+	.set = gfx_v11_0_set_bad_op_fault_state,
+	.process = gfx_v11_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
+	.set = gfx_v11_0_set_priv_inst_fault_state,
+	.process = gfx_v11_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
+	.process = gfx_v11_0_rlc_gc_fed_irq,
+};
+
+static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
+
+	adev->gfx.priv_reg_irq.num_types = 1;
+	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
+
+	adev->gfx.bad_op_irq.num_types = 1;
+	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
+
+	adev->gfx.priv_inst_irq.num_types = 1;
+	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
+
+	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
+	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
+
+}
+
+static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+	if (adev->flags & AMD_IS_APU)
+		adev->gfx.imu.mode = MISSION_MODE;
+	else
+		adev->gfx.imu.mode = DEBUG_MODE;
+
+	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
+}
+
+static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
+}
+
+static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
+{
+	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
+			    adev->gfx.config.max_sh_per_se *
+			    adev->gfx.config.max_shader_engines;
+
+	adev->gds.gds_size = 0x1000;
+	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
+	adev->gds.gws_size = 64;
+	adev->gds.oa_size = 16;
+}
+
+static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+	/* set gfx eng mqd */
+	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+		sizeof(struct v11_gfx_mqd);
+	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+		gfx_v11_0_gfx_mqd_init;
+	/* set compute eng mqd */
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+		sizeof(struct v11_compute_mqd);
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+		gfx_v11_0_compute_mqd_init;
+}
+
+static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+							  u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 data, wgp_bitmask;
+	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+	wgp_bitmask =
+		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+	return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 wgp_idx, wgp_active_bitmap;
+	u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
+	cu_active_bitmap = 0;
+
+	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+		/* if there is one WGP enabled, it means 2 CUs will be enabled */
+		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+		if (wgp_active_bitmap & (1 << wgp_idx))
+			cu_active_bitmap |= cu_bitmap_per_wgp;
+	}
+
+	return cu_active_bitmap;
+}
+
+static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
+				 struct amdgpu_cu_info *cu_info)
+{
+	int i, j, k, counter, active_cu_number = 0;
+	u32 mask, bitmap;
+	unsigned disable_masks[8 * 2];
+
+	if (!adev || !cu_info)
+		return -EINVAL;
+
+	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+				continue;
+			mask = 1;
+			counter = 0;
+			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+			if (i < 8 && j < 2)
+				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
+					adev, disable_masks[i * 2 + j]);
+			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
+
+			/**
+			 * GFX11 could support more than 4 SEs, while the bitmap
+			 * in cu_info struct is 4x4 and ioctl interface struct
+			 * drm_amdgpu_info_device should keep stable.
+			 * So we use last two columns of bitmap to store cu mask for
+			 * SEs 4 to 7, the layout of the bitmap is as below:
+			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+			 */
+			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+				if (bitmap & mask)
+					counter++;
+
+				mask <<= 1;
+			}
+			active_cu_number += counter;
+		}
+	}
+	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	cu_info->number = active_cu_number;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+	return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
+{
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 11,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gfx_v11_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
new file mode 100644
index 000000000000..157a5c812259
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_H__
+#define __GFX_V11_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v11_0_ip_block;
+
+int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+				      bool req);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
new file mode 100644
index 000000000000..999bb3cc88b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "soc21.h"
+#include "gc/gc_11_0_3_offset.h"
+#include "gc/gc_11_0_3_sh_mask.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "gfx_v11_0.h"
+
+
+static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	uint32_t rlc_status0 = 0, rlc_status1 = 0;
+	struct ras_common_if *ras_if = NULL;
+	struct ras_dispatch_if ih_data = {
+		.entry = entry,
+	};
+
+	rlc_status0 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_0));
+	rlc_status1 = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_RLCS_FED_STATUS_1));
+
+	if (!rlc_status0 && !rlc_status1) {
+		dev_warn(adev->dev, "RLC_GC_FED irq is generated, but rlc_status0 and rlc_status1 are empty!\n");
+		return 0;
+	}
+
+	/* Use RLC_RLCS_FED_STATUS_0/1 to distinguish FED error block. */
+	if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+	    REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR))
+		ras_if = adev->sdma.ras_if;
+	else
+		ras_if = adev->gfx.ras_if;
+
+	if (!ras_if) {
+		dev_err(adev->dev, "Gfx or sdma ras block not initialized, rlc_status0:0x%x.\n",
+				rlc_status0);
+		return -EINVAL;
+	}
+
+	dev_warn(adev->dev, "RLC %s FED IRQ\n", ras_if->name);
+
+	if (!amdgpu_sriov_vf(adev)) {
+		ih_data.head = *ras_if;
+		amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	} else {
+		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+			adev->virt.ops->ras_poison_handler(adev, ras_if->block);
+		else
+			dev_warn(adev->dev,
+				"No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name);
+	}
+
+	return 0;
+}
+
+static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
+					struct amdgpu_iv_entry *entry)
+{
+	/* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */
+	if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) &&
+	    (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) &&
+	     !entry->vmid && !entry->pasid) {
+		struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+		uint32_t rlc_status0 = 0;
+
+		rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0);
+
+		if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) ||
+		    REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) {
+			struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+			ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+		}
+
+		if (con && !amdgpu_ras_is_rma(adev))
+			amdgpu_ras_reset_gpu(adev);
+	}
+
+	return 0;
+}
+
+struct amdgpu_gfx_ras gfx_v11_0_3_ras = {
+	.rlc_gc_fed_irq = gfx_v11_0_3_rlc_gc_fed_irq,
+	.poison_consumption_handler = gfx_v11_0_3_poison_consumption_handler,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
new file mode 100644
index 000000000000..672c7920b3d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V11_0_3_H__
+#define __GFX_V11_0_3_H__
+
+extern struct amdgpu_gfx_ras gfx_v11_0_3_ras;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
new file mode 100644
index 000000000000..9b90b66368c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is  first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// Navi3 : Clear SGPRs, VGPRs and LDS
+//   Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+//   Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+//   Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+//      It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+//   Each wave clears SGPRs 0 - 107
+//   Each wave clears VGPRs 0 - 63
+//   The first wave of the workgroup clears its 64KB of LDS
+//   The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+//       before any wave in the workgroup could end.  Without this, it is possible not all SGPRs get cleared.
+
+shader main
+  asic(GFX11)
+  type(CS)
+  wave_size(32)
+// Note: original source code from SQ team
+
+// Takes about 2500 clocks to run.
+//   (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+  S_BARRIER
+
+  //
+  // CLEAR VGPRs
+  //
+  s_mov_b32     m0, 0x00000058  // Loop 96/8=12 times  (loop unrolled for performance)
+
+label_0005:
+  v_movreld_b32     v0, 0
+  v_movreld_b32     v1, 0
+  v_movreld_b32     v2, 0
+  v_movreld_b32     v3, 0
+  v_movreld_b32     v4, 0
+  v_movreld_b32     v5, 0
+  v_movreld_b32     v6, 0
+  v_movreld_b32     v7, 0
+  s_sub_u32     m0, m0, 8
+  s_cbranch_scc0  label_0005
+  //
+  //
+
+  s_mov_b32     s2, 0x80000000                      // Bit31 is first_wave
+  s_and_b32     s2, s2, s0                          // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+  s_cbranch_scc0  label_0023                        // Clean LDS if its first wave of ThreadGroup/WorkGroup
+  // CLEAR LDS
+  //
+  s_mov_b32 exec_lo, 0xffffffff
+  s_mov_b32 exec_hi, 0xffffffff
+  v_mbcnt_lo_u32_b32  v1, exec_hi, 0          // Set V1 to thread-ID (0..63)
+  v_mbcnt_hi_u32_b32  v1, exec_lo, v1        // Set V1 to thread-ID (0..63)
+  v_mul_u32_u24  v1, 0x00000008, v1          // * 8, so each thread is a double-dword address (8byte)
+  s_mov_b32     s2, 0x00000003f                    // 64 loop iterations
+  s_mov_b32     m0, 0xffffffff
+  // Clear all of LDS space
+  // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+  ds_write2_b64  v1, v[2:3], v[2:3] offset1:32
+  ds_write2_b64  v1, v[4:5], v[4:5] offset0:64 offset1:96
+  v_add_co_u32     v1, vcc, 0x00000400, v1
+  s_sub_u32     s2, s2, 1
+  s_cbranch_scc0  label_001F
+  //
+  // CLEAR SGPRs
+  //
+label_0023:
+  s_mov_b32     m0, 0x00000068  // Loop 108/4=27 times  (loop unrolled for performance)
+label_sgpr_loop:
+  s_movreld_b32     s0, 0
+  s_movreld_b32     s1, 0
+  s_movreld_b32     s2, 0
+  s_movreld_b32     s3, 0
+  s_sub_u32         m0, m0, 4
+  s_cbranch_scc0  label_sgpr_loop
+
+  //clear vcc
+  s_mov_b64 vcc, 0          //clear vcc
+  s_mov_b32 flat_scratch_lo, 0   //clear  flat scratch lo SGPR
+  s_mov_b32 flat_scratch_hi, 0   //clear  flat scratch hi SGPR
+  s_mov_b64 ttmp0, 0        //Clear ttmp0 and ttmp1
+  s_mov_b64 ttmp2, 0        //Clear ttmp2 and ttmp3
+  s_mov_b64 ttmp4, 0        //Clear ttmp4 and ttmp5
+  s_mov_b64 ttmp6, 0        //Clear ttmp6 and ttmp7
+  s_mov_b64 ttmp8, 0        //Clear ttmp8 and ttmp9
+  s_mov_b64 ttmp10, 0       //Clear ttmp10 and ttmp11
+  s_mov_b64 ttmp12, 0       //Clear ttmp12 and ttmp13
+  s_mov_b64 ttmp14, 0       //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
new file mode 100644
index 000000000000..3218cc04f543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_11_0_3 */
+static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
+	0xb0804006, 0xbe8200ff,
+	0x00000058, 0xbefd0080,
+	0x7e008480, 0x7e028480,
+	0x7e048480, 0x7e068480,
+	0x7e088480, 0x7e0a8480,
+	0x7e0c8480, 0x7e0e8480,
+	0xbefd0002, 0x80828802,
+	0xbfa1fff5, 0xbe8200ff,
+	0x80000000, 0x8b020002,
+	0xbfa10012, 0xbefe00c1,
+	0xbeff00c1, 0xd71f0001,
+	0x0001007f, 0xd7200001,
+	0x0002027e, 0x16020288,
+	0xbe8200bf, 0xbefd00c1,
+	0xd9382000, 0x00020201,
+	0xd9386040, 0x00040401,
+	0xd7006a01, 0x000202ff,
+	0x00000400, 0x80828102,
+	0xbfa1fff7, 0xbefd00ff,
+	0x00000068, 0xbe804280,
+	0xbe814280, 0xbe824280,
+	0xbe834280, 0x80fd847d,
+	0xbfa1fffa, 0xbeea0180,
+	0xbeec0180, 0xbeee0180,
+	0xbef00180, 0xbef20180,
+	0xbef40180, 0xbef60180,
+	0xbef80180, 0xbefa0180,
+	0xbfb00000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
new file mode 100644
index 000000000000..d01d2712cf57
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -0,0 +1,5793 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "imu_v12_0.h"
+#include "soc24.h"
+#include "nvd.h"
+
+#include "gc/gc_12_0_0_offset.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
+
+#include "soc15.h"
+#include "clearstate_gfx12.h"
+#include "v12_structs.h"
+#include "gfx_v12_0.h"
+#include "nbif_v6_3_1.h"
+#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define GFX12_NUM_GFX_RINGS	1
+#define GFX12_MEC_HPD_SIZE	2048
+
+#define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
+
+#define regCP_GFX_MQD_CONTROL_DEFAULT                                             0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT                                                0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT                                      0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT                                             0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT                                                0x00f00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT                                         0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT                                                0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
+#define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00300000
+
+
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin");
+MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin");
+
+static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
+	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
+	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32),
+	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+	/* SE status registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
+	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3)
+};
+
+static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
+	/* compute registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+};
+
+static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
+	/* gfx queue registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+	/* cp header registers */
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_12_0[] = {
+	SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x00008000, 0x00008000),
+};
+
+#define DEFAULT_SH_MEM_CONFIG \
+	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
+	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev);
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev);
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev);
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+				 struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id);
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t val);
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+					   uint16_t pasid, uint32_t flush_type,
+					   bool all_hub, uint8_t dst_sel);
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+				      bool enable);
+
+static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
+					uint64_t queue_mask)
+{
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
+	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
+	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
+	amdgpu_ring_write(kiq_ring, 0);
+}
+
+static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+				     struct amdgpu_ring *ring)
+{
+	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+	uint64_t wptr_addr = ring->wptr_gpu_addr;
+	uint32_t me = 0, eng_sel = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_COMPUTE:
+		me = 1;
+		eng_sel = 0;
+		break;
+	case AMDGPU_RING_TYPE_GFX:
+		me = 0;
+		eng_sel = 4;
+		break;
+	case AMDGPU_RING_TYPE_MES:
+		me = 2;
+		eng_sel = 5;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+			  PACKET3_MAP_QUEUES_ME((me)) |
+			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+				       struct amdgpu_ring *ring,
+				       enum amdgpu_unmap_queues_action action,
+				       u64 gpu_addr, u64 seq)
+{
+	struct amdgpu_device *adev = kiq_ring->adev;
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
+		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+		return;
+	}
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_UNMAP_QUEUES_ACTION(action) |
+			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+	amdgpu_ring_write(kiq_ring,
+		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+	if (action == PREEMPT_QUEUES_NO_UNMAP) {
+		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+		amdgpu_ring_write(kiq_ring, seq);
+	} else {
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+	}
+}
+
+static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
+				       struct amdgpu_ring *ring,
+				       u64 addr, u64 seq)
+{
+	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+	amdgpu_ring_write(kiq_ring,
+			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+			  PACKET3_QUERY_STATUS_COMMAND(2));
+	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
+					  uint16_t pasid,
+					  uint32_t flush_type,
+					  bool all_hub)
+{
+	gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
+}
+
+static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = {
+	.kiq_set_resources = gfx_v12_0_kiq_set_resources,
+	.kiq_map_queues = gfx_v12_0_kiq_map_queues,
+	.kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues,
+	.kiq_query_status = gfx_v12_0_kiq_query_status,
+	.kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs,
+	.set_resources_size = 8,
+	.map_queues_size = 7,
+	.unmap_queues_size = 6,
+	.query_status_size = 7,
+	.invalidate_tlbs_size = 2,
+};
+
+static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs;
+}
+
+static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+				   int mem_space, int opt, uint32_t addr0,
+				   uint32_t addr1, uint32_t ref,
+				   uint32_t mask, uint32_t inv)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+	amdgpu_ring_write(ring,
+			  /* memory (1) or register (0) */
+			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
+			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
+			   WAIT_REG_MEM_ENGINE(eng_sel)));
+
+	if (mem_space)
+		BUG_ON(addr0 & 0x3); /* Dword align */
+	amdgpu_ring_write(ring, addr0);
+	amdgpu_ring_write(ring, addr1);
+	amdgpu_ring_write(ring, ref);
+	amdgpu_ring_write(ring, mask);
+	amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(scratch, 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 5);
+	if (r) {
+		dev_err(adev->dev,
+			"amdgpu: cp failed to lock ring %d (%d).\n",
+			ring->idx, r);
+		return r;
+	}
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
+		gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
+	} else {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+		amdgpu_ring_write(ring, scratch -
+				  PACKET3_SET_UCONFIG_REG_START);
+		amdgpu_ring_write(ring, 0xDEADBEEF);
+	}
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(scratch);
+		if (tmp == 0xDEADBEEF)
+			break;
+		if (amdgpu_emu_mode == 1)
+			msleep(1);
+		else
+			udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		r = -ETIMEDOUT;
+	return r;
+}
+
+static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+	unsigned index;
+	uint64_t gpu_addr;
+	uint32_t *cpu_ptr;
+	long r;
+
+	/* MES KIQ fw hasn't indirect buffer support for now */
+	if (adev->enable_mes_kiq &&
+	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+		return 0;
+
+	memset(&ib, 0, sizeof(ib));
+
+	r = amdgpu_device_wb_get(adev, &index);
+	if (r)
+		return r;
+
+	gpu_addr = adev->wb.gpu_addr + (index * 4);
+	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+	cpu_ptr = &adev->wb.wb[index];
+
+	r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+	if (r) {
+		dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+		goto err1;
+	}
+
+	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+	ib.ptr[2] = lower_32_bits(gpu_addr);
+	ib.ptr[3] = upper_32_bits(gpu_addr);
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r)
+		goto err2;
+
+	r = dma_fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		r = -ETIMEDOUT;
+		goto err2;
+	} else if (r < 0) {
+		goto err2;
+	}
+
+	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
+		r = 0;
+	else
+		r = -EINVAL;
+err2:
+	amdgpu_ib_free(&ib, NULL);
+	dma_fence_put(f);
+err1:
+	amdgpu_device_wb_free(adev, index);
+	return r;
+}
+
+static void gfx_v12_0_free_microcode(struct amdgpu_device *adev)
+{
+	amdgpu_ucode_release(&adev->gfx.pfp_fw);
+	amdgpu_ucode_release(&adev->gfx.me_fw);
+	amdgpu_ucode_release(&adev->gfx.rlc_fw);
+	amdgpu_ucode_release(&adev->gfx.mec_fw);
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
+static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
+{
+	const struct psp_firmware_header_v1_0 *toc_hdr;
+	int err = 0;
+
+	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_toc.bin", ucode_prefix);
+	if (err)
+		goto out;
+
+	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
+	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
+	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
+	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
+	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
+			le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
+	return 0;
+out:
+	amdgpu_ucode_release(&adev->psp.toc_fw);
+	return err;
+}
+
+static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
+{
+	char ucode_prefix[30];
+	int err;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	uint16_t version_major;
+	uint16_t version_minor;
+
+	DRM_DEBUG("\n");
+
+	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_pfp.bin", ucode_prefix);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_me.bin", ucode_prefix);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
+
+	if (!amdgpu_sriov_vf(adev)) {
+		if (amdgpu_is_kicker_fw(adev))
+			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
+		else
+			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+						   AMDGPU_UCODE_REQUIRED,
+						   "amdgpu/%s_rlc.bin", ucode_prefix);
+		if (err)
+			goto out;
+		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
+		if (err)
+			goto out;
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_mec.bin", ucode_prefix);
+	if (err)
+		goto out;
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
+	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+		err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix);
+
+	/* only one MEC for gfx 12 */
+	adev->gfx.mec2_fw = NULL;
+
+	if (adev->gfx.imu.funcs) {
+		if (adev->gfx.imu.funcs->init_microcode) {
+			err = adev->gfx.imu.funcs->init_microcode(adev);
+			if (err)
+				dev_err(adev->dev, "Failed to load imu firmware!\n");
+		}
+	}
+
+out:
+	if (err) {
+		amdgpu_ucode_release(&adev->gfx.pfp_fw);
+		amdgpu_ucode_release(&adev->gfx.me_fw);
+		amdgpu_ucode_release(&adev->gfx.rlc_fw);
+		amdgpu_ucode_release(&adev->gfx.mec_fw);
+	}
+
+	return err;
+}
+
+static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	count += 1;
+
+	for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
+		if (sect->id == SECT_CONTEXT) {
+			for (ext = sect->section; ext->extent != NULL; ++ext)
+				count += 2 + ext->reg_count;
+		} else
+			return 0;
+	}
+
+	return count;
+}
+
+static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
+{
+	u32 count = 0, clustercount = 0, i;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (adev->gfx.rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	count += 1;
+
+	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+		if (sect->id == SECT_CONTEXT) {
+			for (ext = sect->section; ext->extent != NULL; ++ext) {
+				clustercount++;
+				buffer[count++] = ext->reg_count;
+				buffer[count++] = ext->reg_index;
+
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = cpu_to_le32(ext->extent[i]);
+			}
+		} else
+			return;
+	}
+
+	buffer[0] = clustercount;
+}
+
+static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev)
+{
+	/* clear state block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+			&adev->gfx.rlc.clear_state_gpu_addr,
+			(void **)&adev->gfx.rlc.cs_ptr);
+
+	/* jump table block */
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+			&adev->gfx.rlc.cp_table_gpu_addr,
+			(void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
+{
+	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
+
+	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
+	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
+	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
+	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
+	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
+	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
+	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
+	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
+	adev->gfx.rlc.rlcg_reg_access_supported = true;
+}
+
+static int gfx_v12_0_rlc_init(struct amdgpu_device *adev)
+{
+	const struct cs_section_def *cs_data;
+	int r;
+
+	adev->gfx.rlc.cs_data = gfx12_cs_data;
+
+	cs_data = adev->gfx.rlc.cs_data;
+
+	if (cs_data) {
+		/* init clear state block */
+		r = amdgpu_gfx_rlc_init_csb(adev);
+		if (r)
+			return r;
+	}
+
+	/* init spm vmid with 0xf */
+	if (adev->gfx.rlc.funcs->update_spm_vmid)
+		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
+
+	return 0;
+}
+
+static void gfx_v12_0_mec_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
+}
+
+static void gfx_v12_0_me_init(struct amdgpu_device *adev)
+{
+	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+	amdgpu_gfx_graphics_queue_acquire(adev);
+}
+
+static int gfx_v12_0_mec_init(struct amdgpu_device *adev)
+{
+	int r;
+	u32 *hpd;
+	size_t mec_hpd_size;
+
+	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* take ownership of the relevant compute queues */
+	amdgpu_gfx_compute_queue_acquire(adev);
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE;
+
+	if (mec_hpd_size) {
+		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+					      AMDGPU_GEM_DOMAIN_GTT,
+					      &adev->gfx.mec.hpd_eop_obj,
+					      &adev->gfx.mec.hpd_eop_gpu_addr,
+					      (void **)&hpd);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+			gfx_v12_0_mec_fini(adev);
+			return r;
+		}
+
+		memset(hpd, 0, mec_hpd_size);
+
+		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+	}
+
+	return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(address << SQ_IND_INDEX__INDEX__SHIFT));
+	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+			   uint32_t thread, uint32_t regno,
+			   uint32_t num, uint32_t *out)
+{
+	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
+		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
+		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+		(SQ_IND_INDEX__AUTO_INCR_MASK));
+	while (num--)
+		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
+}
+
+static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev,
+				     uint32_t xcc_id,
+				     uint32_t simd, uint32_t wave,
+				     uint32_t *dst, int *no_fields)
+{
+	/* in gfx12 the SIMD_ID is specified as part of the INSTANCE
+	 * field when performing a select_se_sh so it should be
+	 * zero here */
+	WARN_ON(simd != 0);
+
+	/* type 4 wave data */
+	dst[(*no_fields)++] = 4;
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
+	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE);
+}
+
+static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev,
+				      uint32_t xcc_id, uint32_t simd,
+				      uint32_t wave, uint32_t start,
+				      uint32_t size, uint32_t *dst)
+{
+	WARN_ON(simd != 0);
+
+	wave_read_regs(
+		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+		dst);
+}
+
+static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev,
+				      uint32_t xcc_id, uint32_t simd,
+				      uint32_t wave, uint32_t thread,
+				      uint32_t start, uint32_t size,
+				      uint32_t *dst)
+{
+	wave_read_regs(
+		adev, wave, thread,
+		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
+				       u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
+{
+	soc24_grbm_select(adev, me, pipe, q, vm);
+}
+
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE      73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE       484
+#define MQD_FWWORKAREA_ALIGNMENT  256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+						  struct amdgpu_gfx_shadow_info *shadow_info)
+{
+	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+					 struct amdgpu_gfx_shadow_info *shadow_info,
+					 bool skip_check)
+{
+	if (adev->gfx.cp_gfx_shadow || skip_check) {
+		gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+		return 0;
+	}
+
+	memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+	return -EINVAL;
+}
+
+static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v12_0_select_se_sh,
+	.read_wave_data = &gfx_v12_0_read_wave_data,
+	.read_wave_sgprs = &gfx_v12_0_read_wave_sgprs,
+	.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
+	.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
+	.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+	.get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
+};
+
+static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
+{
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+				   int me, int pipe, int queue)
+{
+	int r;
+	struct amdgpu_ring *ring;
+	unsigned int irq_type;
+
+	ring = &adev->gfx.gfx_ring[ring_id];
+
+	ring->me = me;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+
+	if (!ring_id)
+		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+	else
+		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+			     AMDGPU_RING_PRIO_DEFAULT, NULL);
+	if (r)
+		return r;
+	return 0;
+}
+
+static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+				       int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring;
+	unsigned int hw_prio;
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX12_MEC_HPD_SIZE);
+	ring->vm_hub = AMDGPU_GFXHUB(0);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
+			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
+			     hw_prio, NULL);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static struct {
+	SOC24_FIRMWARE_ID	id;
+	unsigned int		offset;
+	unsigned int		size;
+	unsigned int		size_x16;
+} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
+
+#define RLC_TOC_OFFSET_DWUNIT   8
+#define RLC_SIZE_MULTIPLE       1024
+#define RLC_TOC_UMF_SIZE_inM	23ULL
+#define RLC_TOC_FORMAT_API	165ULL
+
+static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
+{
+	RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
+
+	while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
+		rlc_autoload_info[ucode->id].id = ucode->id;
+		rlc_autoload_info[ucode->id].offset =
+			ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
+		rlc_autoload_info[ucode->id].size =
+			ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
+					  ucode->size * 4;
+		ucode++;
+	}
+}
+
+static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+	uint32_t total_size = 0;
+	SOC24_FIRMWARE_ID id;
+
+	gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
+
+	for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
+		total_size += rlc_autoload_info[id].size;
+
+	/* In case the offset in rlc toc ucode is aligned */
+	if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
+		total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
+			rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
+	if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
+		total_size = RLC_TOC_UMF_SIZE_inM << 20;
+
+	return total_size;
+}
+
+static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
+{
+	int r;
+	uint32_t total_size;
+
+	total_size = gfx_v12_0_calc_toc_total_size(adev);
+
+	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.rlc.rlc_autoload_bo,
+				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
+				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+						       SOC24_FIRMWARE_ID id,
+						       const void *fw_data,
+						       uint32_t fw_size)
+{
+	uint32_t toc_offset;
+	uint32_t toc_fw_size;
+	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+	if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
+		return;
+
+	toc_offset = rlc_autoload_info[id].offset;
+	toc_fw_size = rlc_autoload_info[id].size;
+
+	if (fw_size == 0)
+		fw_size = toc_fw_size;
+
+	if (fw_size > toc_fw_size)
+		fw_size = toc_fw_size;
+
+	memcpy(ptr + toc_offset, fw_data, fw_size);
+
+	if (fw_size < toc_fw_size)
+		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+	void *data;
+	uint32_t size;
+	uint32_t *toc_ptr;
+
+	data = adev->psp.toc.start_addr;
+	size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
+
+	toc_ptr = (uint32_t *)data + size / 4 - 2;
+	*toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
+
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
+						   data, size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
+	const struct rlc_firmware_header_v2_0 *rlc_hdr;
+	const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
+	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
+	uint16_t version_major, version_minor;
+
+	/* pfp ucode */
+	cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+	/* instruction */
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP,
+						   fw_data, fw_size);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(cpv2_hdr->data_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK,
+						   fw_data, fw_size);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK,
+						   fw_data, fw_size);
+	/* me ucode */
+	cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+	/* instruction */
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME,
+						   fw_data, fw_size);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(cpv2_hdr->data_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P0_STACK,
+						   fw_data, fw_size);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_ME_P1_STACK,
+						   fw_data, fw_size);
+	/* mec ucode */
+	cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.mec_fw->data;
+	/* instruction */
+	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+		le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
+						   fw_data, fw_size);
+	/* data */
+	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+		le32_to_cpu(cpv2_hdr->data_offset_bytes));
+	fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
+						   fw_data, fw_size);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
+						   fw_data, fw_size);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
+						   fw_data, fw_size);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
+						   fw_data, fw_size);
+
+	/* rlc ucode */
+	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+		adev->gfx.rlc_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
+						   fw_data, fw_size);
+
+	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+	if (version_major == 2) {
+		if (version_minor >= 1) {
+			rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
+			fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
+			gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
+						   fw_data, fw_size);
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
+			fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
+			gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
+						   fw_data, fw_size);
+		}
+		if (version_minor >= 2) {
+			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
+			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
+			gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
+						   fw_data, fw_size);
+
+			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
+			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
+			gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
+						   fw_data, fw_size);
+		}
+	}
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	uint32_t fw_size;
+	const struct sdma_firmware_header_v3_0 *sdma_hdr;
+
+	sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
+		adev->sdma.instance[0].fw->data;
+	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
+			le32_to_cpu(sdma_hdr->ucode_offset_bytes));
+	fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
+
+	gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
+						   fw_data, fw_size);
+}
+
+static void
+gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	unsigned fw_size;
+	const struct mes_firmware_header_v1_0 *mes_hdr;
+	int pipe, ucode_id, data_id;
+
+	for (pipe = 0; pipe < 2; pipe++) {
+		if (pipe == 0) {
+			ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
+			data_id  = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
+		} else {
+			ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
+			data_id  = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
+		}
+
+		mes_hdr = (const struct mes_firmware_header_v1_0 *)
+			adev->mes.fw[pipe]->data;
+
+		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+		gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
+
+		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
+				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+		gfx_v12_0_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
+	}
+}
+
+static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+	uint32_t rlc_g_offset, rlc_g_size;
+	uint64_t gpu_addr;
+	uint32_t data;
+
+	/* RLC autoload sequence 2: copy ucode */
+	gfx_v12_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+	gfx_v12_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+	gfx_v12_0_rlc_backdoor_autoload_copy_mes_ucode(adev);
+	gfx_v12_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+
+	rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
+	rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
+	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
+
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
+
+	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
+
+	if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+		/* RLC autoload sequence 3: load IMU fw */
+		if (adev->gfx.imu.funcs->load_microcode)
+			adev->gfx.imu.funcs->load_microcode(adev);
+		/* RLC autoload sequence 4 init IMU fw */
+		if (adev->gfx.imu.funcs->setup_imu)
+			adev->gfx.imu.funcs->setup_imu(adev);
+		if (adev->gfx.imu.funcs->start_imu)
+			adev->gfx.imu.funcs->start_imu(adev);
+
+		/* RLC autoload sequence 5 disable gpa mode */
+		gfx_v12_0_disable_gpa_mode(adev);
+	} else {
+		/* unhalt rlc to start autoload without imu */
+		data = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
+		data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
+		data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
+		WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, data);
+		WREG32_SOC15(GC, 0, regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+	uint32_t *ptr;
+	uint32_t inst;
+
+	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
+		adev->gfx.ip_dump_core = NULL;
+	} else {
+		adev->gfx.ip_dump_core = ptr;
+	}
+
+	/* Allocate memory for compute queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
+		adev->gfx.mec.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
+		adev->gfx.ip_dump_compute_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_compute_queues = ptr;
+	}
+
+	/* Allocate memory for gfx queue registers for all the instances */
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
+		adev->gfx.me.num_queue_per_pipe;
+
+	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
+	if (!ptr) {
+		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
+		adev->gfx.ip_dump_gfx_queues = NULL;
+	} else {
+		adev->gfx.ip_dump_gfx_queues = ptr;
+	}
+}
+
+static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
+{
+	int i, j, k, r, ring_id = 0;
+	unsigned num_compute_rings;
+	int xcc_id = 0;
+	struct amdgpu_device *adev = ip_block->adev;
+	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 8;
+		adev->gfx.mec.num_mec = 1;
+		adev->gfx.mec.num_pipe_per_mec = 2;
+		adev->gfx.mec.num_queue_per_pipe = 4;
+		break;
+	default:
+		adev->gfx.me.num_me = 1;
+		adev->gfx.me.num_pipe_per_me = 1;
+		adev->gfx.me.num_queue_per_pipe = 1;
+		adev->gfx.mec.num_mec = 1;
+		adev->gfx.mec.num_pipe_per_mec = 4;
+		adev->gfx.mec.num_queue_per_pipe = 8;
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		if (!adev->gfx.disable_uq &&
+		    adev->gfx.me_fw_version  >= 2780 &&
+		    adev->gfx.pfp_fw_version >= 2840 &&
+		    adev->gfx.mec_fw_version >= 3050 &&
+		    adev->mes.fw_version[0] >= 123) {
+			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		if (adev->gfx.me_fw_version  >= 2480 &&
+		    adev->gfx.pfp_fw_version >= 2530 &&
+		    adev->gfx.mec_fw_version >= 2680 &&
+		    adev->mes.fw_version[0] >= 100)
+			adev->gfx.enable_cleaner_shader = true;
+		break;
+	default:
+		adev->gfx.enable_cleaner_shader = false;
+		break;
+	}
+
+	if (adev->gfx.num_compute_rings) {
+		/* recalculate compute rings to use based on hardware configuration */
+		num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+				     adev->gfx.mec.num_queue_per_pipe) / 2;
+		adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+						  num_compute_rings);
+	}
+
+	/* EOP Event */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
+			      &adev->gfx.eop_irq);
+	if (r)
+		return r;
+
+	/* Bad opcode Event */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+			      &adev->gfx.bad_op_irq);
+	if (r)
+		return r;
+
+	/* Privileged reg */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
+			      &adev->gfx.priv_reg_irq);
+	if (r)
+		return r;
+
+	/* Privileged inst */
+	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
+			      GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+			      &adev->gfx.priv_inst_irq);
+	if (r)
+		return r;
+
+	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+	gfx_v12_0_me_init(adev);
+
+	r = gfx_v12_0_rlc_init(adev);
+	if (r) {
+		dev_err(adev->dev, "Failed to init rlc BOs!\n");
+		return r;
+	}
+
+	r = gfx_v12_0_mec_init(adev);
+	if (r) {
+		dev_err(adev->dev, "Failed to init MEC BOs!\n");
+		return r;
+	}
+
+	if (adev->gfx.num_gfx_rings) {
+		/* set up the gfx ring */
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+						continue;
+
+					r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+								    i, k, j);
+					if (r)
+						return r;
+					ring_id++;
+				}
+			}
+		}
+	}
+
+	if (adev->gfx.num_compute_rings) {
+		ring_id = 0;
+		/* set up the compute queues - allocate horizontally across pipes */
+		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+					if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+									     0, i, k, j))
+						continue;
+
+					r = gfx_v12_0_compute_ring_init(adev, ring_id,
+									i, k, j);
+					if (r)
+						return r;
+
+					ring_id++;
+				}
+			}
+		}
+	}
+
+	adev->gfx.gfx_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+	adev->gfx.compute_supported_reset =
+		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		if ((adev->gfx.me_fw_version >= 2660) &&
+		    (adev->gfx.mec_fw_version >= 2920) &&
+		    !amdgpu_sriov_vf(adev) &&
+		    !adev->debug_disable_gpu_ring_reset) {
+			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (!adev->enable_mes_kiq) {
+		r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
+		if (r) {
+			dev_err(adev->dev, "Failed to init KIQ BOs!\n");
+			return r;
+		}
+
+		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_compute_mqd), 0);
+	if (r)
+		return r;
+
+	/* allocate visible FB for rlc auto-loading fw */
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		r = gfx_v12_0_rlc_autoload_buffer_init(adev);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v12_0_gpu_early_init(adev);
+	if (r)
+		return r;
+
+	gfx_v12_0_alloc_ip_dump(adev);
+
+	r = amdgpu_gfx_sysfs_init(adev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static void gfx_v12_0_pfp_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+			      &adev->gfx.pfp.pfp_fw_gpu_addr,
+			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+
+	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
+			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+}
+
+static void gfx_v12_0_me_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+			      &adev->gfx.me.me_fw_gpu_addr,
+			      (void **)&adev->gfx.me.me_fw_ptr);
+
+	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
+			       &adev->gfx.me.me_fw_data_gpu_addr,
+			       (void **)&adev->gfx.me.me_fw_data_ptr);
+}
+
+static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+			&adev->gfx.rlc.rlc_autoload_gpu_addr,
+			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+	int i;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+	amdgpu_gfx_mqd_sw_fini(adev, 0);
+
+	if (!adev->enable_mes_kiq) {
+		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
+		amdgpu_gfx_kiq_fini(adev, 0);
+	}
+
+	gfx_v12_0_pfp_fini(adev);
+	gfx_v12_0_me_fini(adev);
+	gfx_v12_0_rlc_fini(adev);
+	gfx_v12_0_mec_fini(adev);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+		gfx_v12_0_rlc_autoload_buffer_fini(adev);
+
+	gfx_v12_0_free_microcode(adev);
+
+	amdgpu_gfx_sysfs_fini(adev);
+
+	kfree(adev->gfx.ip_dump_core);
+	kfree(adev->gfx.ip_dump_compute_queues);
+	kfree(adev->gfx.ip_dump_gfx_queues);
+
+	return 0;
+}
+
+static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+				   u32 sh_num, u32 instance, int xcc_id)
+{
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+				     INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+				     instance);
+
+	if (se_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+	if (sh_num == 0xffffffff)
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+				     1);
+	else
+		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v12_0_get_sa_active_bitmap(struct amdgpu_device *adev)
+{
+	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
+
+	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_CC_GC_SA_UNIT_DISABLE);
+	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
+					    GRBM_CC_GC_SA_UNIT_DISABLE,
+					    SA_DISABLE);
+	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGRBM_GC_USER_SA_UNIT_DISABLE);
+	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
+						 GRBM_GC_USER_SA_UNIT_DISABLE,
+						 SA_DISABLE);
+	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
+					    adev->gfx.config.max_shader_engines);
+
+	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
+}
+
+static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
+	u32 rb_mask;
+
+	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
+	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
+					    CC_RB_BACKEND_DISABLE,
+					    BACKEND_DISABLE);
+	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
+	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
+						 GC_USER_RB_BACKEND_DISABLE,
+						 BACKEND_DISABLE);
+	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
+					    adev->gfx.config.max_shader_engines);
+
+	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
+}
+
+static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
+{
+	u32 rb_bitmap_per_sa;
+	u32 rb_bitmap_width_per_sa;
+	u32 max_sa;
+	u32 active_sa_bitmap;
+	u32 global_active_rb_bitmap;
+	u32 active_rb_bitmap = 0;
+	u32 i;
+
+	/* query sa bitmap from SA_UNIT_DISABLE registers */
+	active_sa_bitmap = gfx_v12_0_get_sa_active_bitmap(adev);
+	/* query rb bitmap from RB_BACKEND_DISABLE registers */
+	global_active_rb_bitmap = gfx_v12_0_get_rb_active_bitmap(adev);
+
+	/* generate active rb bitmap according to active sa bitmap */
+	max_sa = adev->gfx.config.max_shader_engines *
+		 adev->gfx.config.max_sh_per_se;
+	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
+				 adev->gfx.config.max_sh_per_se;
+	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
+	for (i = 0; i < max_sa; i++) {
+		if (active_sa_bitmap & (1 << i))
+			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
+	}
+
+	active_rb_bitmap &= global_active_rb_bitmap;
+	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
+	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
+}
+
+#define LDS_APP_BASE           0x1
+#define SCRATCH_APP_BASE       0x2
+
+static void gfx_v12_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+	int i;
+	uint32_t sh_mem_bases;
+	uint32_t data;
+
+	/*
+	 * Configure apertures:
+	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
+	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
+	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
+	 */
+	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+			SCRATCH_APP_BASE;
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+		soc24_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
+
+		/* Enable trap for each kfd vmid. */
+		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
+		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
+		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_tcp_harvest(struct amdgpu_device *adev)
+{
+	/* TODO: harvest feature to be added later. */
+}
+
+static void gfx_v12_0_get_tcc_info(struct amdgpu_device *adev)
+{
+}
+
+static void gfx_v12_0_constants_init(struct amdgpu_device *adev)
+{
+	u32 tmp;
+	int i;
+
+	if (!amdgpu_sriov_vf(adev))
+		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+	gfx_v12_0_setup_rb(adev);
+	gfx_v12_0_get_cu_info(adev, &adev->gfx.cu_info);
+	gfx_v12_0_get_tcc_info(adev);
+	adev->gfx.config.pa_sc_tile_steering_override = 0;
+
+	/* XXX SH_MEM regs */
+	/* where to put LDS, scratch, GPUVM in FSA64 space */
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
+		soc24_grbm_select(adev, 0, 0, 0, i);
+		/* CP and shaders */
+		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
+		if (i != 0) {
+			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+				(adev->gmc.private_aperture_start >> 48));
+			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+				(adev->gmc.shared_aperture_start >> 48));
+			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
+		}
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+
+	mutex_unlock(&adev->srbm_mutex);
+
+	gfx_v12_0_init_compute_vmid(adev);
+}
+
+static u32 gfx_v12_0_get_cpg_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	if (me != 0)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+	default:
+		return 0;
+	}
+}
+
+static u32 gfx_v12_0_get_cpc_int_cntl(struct amdgpu_device *adev,
+				      int me, int pipe)
+{
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+	if (me != 1)
+		return 0;
+
+	switch (pipe) {
+	case 0:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+	case 1:
+		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+	default:
+		return 0;
+	}
+}
+
+static void gfx_v12_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+					       bool enable)
+{
+	u32 tmp, cp_int_cntl_reg;
+	int i, j;
+
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+			if (cp_int_cntl_reg) {
+				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+						    enable ? 1 : 0);
+				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+						    enable ? 1 : 0);
+				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
+			}
+		}
+	}
+}
+
+static int gfx_v12_0_init_csb(struct amdgpu_device *adev)
+{
+	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
+	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
+			adev->gfx.rlc.clear_state_gpu_addr >> 32);
+	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
+			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+
+	return 0;
+}
+
+static void gfx_v12_0_rlc_stop(struct amdgpu_device *adev)
+{
+	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
+}
+
+static void gfx_v12_0_rlc_reset(struct amdgpu_device *adev)
+{
+	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+	udelay(50);
+	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+	udelay(50);
+}
+
+static void gfx_v12_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t rlc_pg_cntl;
+
+	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+	if (!enable) {
+		/* RLC_PG_CNTL[23] = 0 (default)
+		 * RLC will wait for handshake acks with SMU
+		 * GFXOFF will be enabled
+		 * RLC_PG_CNTL[23] = 1
+		 * RLC will not issue any message to SMU
+		 * hence no handshake between SMU & RLC
+		 * GFXOFF will be disabled
+		 */
+		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+	} else
+		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
+	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v12_0_rlc_start(struct amdgpu_device *adev)
+{
+	/* TODO: enable rlc & smu handshake until smu
+	 * and gfxoff feature works as expected */
+	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+		gfx_v12_0_rlc_smu_handshake_cntl(adev, false);
+
+	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+	udelay(50);
+}
+
+static void gfx_v12_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+	uint32_t tmp;
+
+	/* enable Save Restore Machine */
+	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
+	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
+}
+
+static void gfx_v12_0_load_rlcg_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_0 *hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+
+	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
+		     RLCG_UCODE_LOADING_START_ADDRESS);
+
+	for (i = 0; i < fw_size; i++)
+		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
+			     le32_to_cpup(fw_data++));
+
+	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+}
+
+static void gfx_v12_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_2 *hdr;
+	const __le32 *fw_data;
+	unsigned i, fw_size;
+	u32 tmp;
+
+	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
+
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
+	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
+	for (i = 0; i < fw_size; i++) {
+		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
+			msleep(1);
+		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
+				le32_to_cpup(fw_data++));
+	}
+
+	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
+
+	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
+	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
+	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
+	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
+}
+
+static int gfx_v12_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_0 *hdr;
+	uint16_t version_major;
+	uint16_t version_minor;
+
+	if (!adev->gfx.rlc_fw)
+		return -EINVAL;
+
+	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+	amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+	version_major = le16_to_cpu(hdr->header.header_version_major);
+	version_minor = le16_to_cpu(hdr->header.header_version_minor);
+
+	if (version_major == 2) {
+		gfx_v12_0_load_rlcg_microcode(adev);
+		if (amdgpu_dpm == 1) {
+			if (version_minor >= 2)
+				gfx_v12_0_load_rlc_iram_dram_microcode(adev);
+		}
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int gfx_v12_0_rlc_resume(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		gfx_v12_0_init_csb(adev);
+
+		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+			gfx_v12_0_rlc_enable_srm(adev);
+	} else {
+		if (amdgpu_sriov_vf(adev)) {
+			gfx_v12_0_init_csb(adev);
+			return 0;
+		}
+
+		adev->gfx.rlc.funcs->stop(adev);
+
+		/* disable CG */
+		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
+
+		/* disable PG */
+		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
+
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+			/* legacy rlc firmware loading */
+			r = gfx_v12_0_rlc_load_microcode(adev);
+			if (r)
+				return r;
+		}
+
+		gfx_v12_0_init_csb(adev);
+
+		adev->gfx.rlc.funcs->start(adev);
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_config_gfx_rs64(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *pfp_hdr;
+	const struct gfx_firmware_header_v2_0 *me_hdr;
+	const struct gfx_firmware_header_v2_0 *mec_hdr;
+	uint32_t pipe_id, tmp;
+
+	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.mec_fw->data;
+	me_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+
+	/* config pfp program start addr */
+	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+			(pfp_hdr->ucode_start_addr_hi << 30) |
+			(pfp_hdr->ucode_start_addr_lo >> 2));
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+			pfp_hdr->ucode_start_addr_hi >> 2);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset pfp pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* clear pfp pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* config me program start addr */
+	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+			(me_hdr->ucode_start_addr_hi << 30) |
+			(me_hdr->ucode_start_addr_lo >> 2));
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+			me_hdr->ucode_start_addr_hi>>2);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset me pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* clear me pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	/* config mec program start addr */
+	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
+		soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+					mec_hdr->ucode_start_addr_lo >> 2 |
+					mec_hdr->ucode_start_addr_hi << 30);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+					mec_hdr->ucode_start_addr_hi >> 2);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+
+	/* reset mec pipe */
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+
+	/* clear mec pipe reset */
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
+}
+
+static void gfx_v12_0_set_pfp_ucode_start_addr(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *cp_hdr;
+	unsigned pipe_id, tmp;
+
+	cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
+			     (cp_hdr->ucode_start_addr_hi << 30) |
+			     (cp_hdr->ucode_start_addr_lo >> 2));
+		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
+			     cp_hdr->ucode_start_addr_hi>>2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_PFP_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					PFP_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_me_ucode_start_addr(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *cp_hdr;
+	unsigned pipe_id, tmp;
+
+	cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
+			     (cp_hdr->ucode_start_addr_hi << 30) |
+			     (cp_hdr->ucode_start_addr_lo >> 2) );
+		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
+			     cp_hdr->ucode_start_addr_hi>>2);
+
+		/*
+		 * Program CP_ME_CNTL to reset given PIPE to take
+		 * effect of CP_ME_PRGRM_CNTR_START.
+		 */
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 1);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 1);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+		/* Clear pfp pipe0 reset bit. */
+		if (pipe_id == 0)
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE0_RESET, 0);
+		else
+			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
+					ME_PIPE1_RESET, 0);
+		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v12_0_set_mec_ucode_start_addr(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *cp_hdr;
+	unsigned pipe_id;
+
+	cp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.mec_fw->data;
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
+		soc24_grbm_select(adev, 1, pipe_id, 0, 0);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
+			     cp_hdr->ucode_start_addr_lo >> 2 |
+			     cp_hdr->ucode_start_addr_hi << 30);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
+			     cp_hdr->ucode_start_addr_hi >> 2);
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+	uint32_t cp_status;
+	uint32_t bootload_status;
+	int i;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
+		bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+		if ((cp_status == 0) &&
+		    (REG_GET_FIELD(bootload_status,
+			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+			break;
+		}
+		udelay(1);
+		if (amdgpu_emu_mode)
+			msleep(10);
+	}
+
+	if (i >= adev->usec_timeout) {
+		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		gfx_v12_0_set_pfp_ucode_start_addr(adev);
+		gfx_v12_0_set_me_ucode_start_addr(adev);
+		gfx_v12_0_set_mec_ucode_start_addr(adev);
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+	int i;
+	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
+
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v2_0 *pfp_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.pfp_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+	/* instruction */
+	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+		le32_to_cpu(pfp_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
+
+	/* 64kb align */
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.pfp.pfp_fw_obj,
+				      &adev->gfx.pfp.pfp_fw_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
+		gfx_v12_0_pfp_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev, fw_data_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.pfp.pfp_fw_data_obj,
+				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
+				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
+		gfx_v12_0_pfp_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
+	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
+
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
+		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
+		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_PFP_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
+	/* Waiting for cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
+			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
+			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	gfx_v12_0_set_pfp_ucode_start_addr(adev);
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
+{
+	int r;
+	const struct gfx_firmware_header_v2_0 *me_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
+	uint32_t tmp;
+	uint32_t usec_timeout = 50000;  /* wait for 50ms */
+
+	me_hdr = (const struct gfx_firmware_header_v2_0 *)
+		adev->gfx.me_fw->data;
+
+	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+	/* instruction */
+	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
+	/* data */
+	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+		le32_to_cpu(me_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
+
+	/* 64kb align*/
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.me.me_fw_obj,
+				      &adev->gfx.me.me_fw_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
+		gfx_v12_0_me_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev, fw_data_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.me.me_fw_data_obj,
+				      &adev->gfx.me.me_fw_data_gpu_addr,
+				      (void **)&adev->gfx.me.me_fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
+		gfx_v12_0_me_fini(adev);
+		return r;
+	}
+
+	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
+	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
+
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
+
+	if (amdgpu_emu_mode == 1)
+		amdgpu_device_flush_hdp(adev, NULL);
+
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
+		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
+		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
+
+	/*
+	 * Programming any of the CP_ME_IC_BASE registers
+	 * forces invalidation of the ME L1 I$. Wait for the
+	 * invalidation complete
+	 */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Prime the instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
+
+	/* Waiting for instruction cache primed*/
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+			ICACHE_PRIMED))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to prime instruction cache\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
+		soc24_grbm_select(adev, 0, pipe_id, 0, 0);
+
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
+			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
+			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
+
+	/* Invalidate the data caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
+
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
+			INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
+		return -EINVAL;
+	}
+
+	gfx_v12_0_set_me_ucode_start_addr(adev);
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+	int r;
+
+	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
+		return -EINVAL;
+
+	gfx_v12_0_cp_gfx_enable(adev, false);
+
+	r = gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+		return r;
+	}
+
+	r = gfx_v12_0_cp_gfx_load_me_microcode_rs64(adev);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+	/* init the CP */
+	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
+		     adev->gfx.config.max_hw_contexts - 1);
+	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
+
+	if (!amdgpu_async_gfx_ring)
+		gfx_v12_0_cp_gfx_enable(adev, true);
+
+	return 0;
+}
+
+static void gfx_v12_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+					 CP_PIPE_ID pipe)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v12_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+					  struct amdgpu_ring *ring)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+	if (ring->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, ring->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	}
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
+
+	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	u32 tmp;
+	u32 rb_bufsz;
+	u64 rb_addr, rptr_addr, wptr_gpu_addr;
+
+	/* Set the write pointer delay */
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
+
+	/* set the RB to use vmid 0 */
+	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
+
+	/* Init gfx ring 0 for pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+
+	/* Set ring buffer size */
+	ring = &adev->gfx.gfx_ring[0];
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+	/* Initialize the ring buffer's write pointers */
+	ring->wptr = 0;
+	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+	/* set the wb address whether it's enabled or not */
+	rptr_addr = ring->rptr_gpu_addr;
+	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+	wptr_gpu_addr = ring->wptr_gpu_addr;
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
+		     lower_32_bits(wptr_gpu_addr));
+	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
+		     upper_32_bits(wptr_gpu_addr));
+
+	mdelay(1);
+	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
+
+	rb_addr = ring->gpu_addr >> 8;
+	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
+	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
+
+	gfx_v12_0_cp_gfx_set_doorbell(adev, ring);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* Switch to pipe 0 */
+	mutex_lock(&adev->srbm_mutex);
+	gfx_v12_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	/* start the ring */
+	gfx_v12_0_cp_gfx_start(adev);
+	return 0;
+}
+
+static void gfx_v12_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+	u32 data;
+
+	data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
+						 enable ? 0 : 1);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
+						 enable ? 0 : 1);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
+						 enable ? 0 : 1);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
+						 enable ? 0 : 1);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
+						 enable ? 0 : 1);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
+						 enable ? 1 : 0);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
+			                         enable ? 1 : 0);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
+						 enable ? 1 : 0);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
+						 enable ? 1 : 0);
+	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
+						 enable ? 0 : 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
+
+	adev->gfx.kiq[0].ring.sched.ready = enable;
+
+	udelay(50);
+}
+
+static int gfx_v12_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
+{
+	const struct gfx_firmware_header_v2_0 *mec_hdr;
+	const __le32 *fw_ucode, *fw_data;
+	u32 tmp, fw_ucode_size, fw_data_size;
+	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
+	u32 *fw_ucode_ptr, *fw_data_ptr;
+	int r;
+
+	if (!adev->gfx.mec_fw)
+		return -EINVAL;
+
+	gfx_v12_0_cp_compute_enable(adev, false);
+
+	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
+	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
+				le32_to_cpu(mec_hdr->ucode_offset_bytes));
+	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
+
+	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+				le32_to_cpu(mec_hdr->data_offset_bytes));
+	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
+
+	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.mec.mec_fw_obj,
+				      &adev->gfx.mec.mec_fw_gpu_addr,
+				      (void **)&fw_ucode_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+		gfx_v12_0_mec_fini(adev);
+		return r;
+	}
+
+	r = amdgpu_bo_create_reserved(adev,
+				      ALIGN(fw_data_size, 64 * 1024) *
+				      adev->gfx.mec.num_pipe_per_mec,
+				      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->gfx.mec.mec_fw_data_obj,
+				      &adev->gfx.mec.mec_fw_data_gpu_addr,
+				      (void **)&fw_data_ptr);
+	if (r) {
+		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
+		gfx_v12_0_mec_fini(adev);
+		return r;
+	}
+
+	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+		memcpy(fw_data_ptr + i * ALIGN(fw_data_size, 64 * 1024) / 4, fw_data, fw_data_size);
+	}
+
+	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
+	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
+
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
+
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
+		soc24_grbm_select(adev, 1, i, 0, 0);
+
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO,
+			     lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+					   i * ALIGN(fw_data_size, 64 * 1024)));
+		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
+			     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
+					   i * ALIGN(fw_data_size, 64 * 1024)));
+
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
+			     lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
+			     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+	}
+	mutex_unlock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
+				       INVALIDATE_DCACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	/* Trigger an invalidation of the L1 instruction caches */
+	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
+
+	/* Wait for invalidation complete */
+	for (i = 0; i < usec_timeout; i++) {
+		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
+		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+				       INVALIDATE_CACHE_COMPLETE))
+			break;
+		udelay(1);
+	}
+
+	if (i >= usec_timeout) {
+		dev_err(adev->dev, "failed to invalidate instruction cache\n");
+		return -EINVAL;
+	}
+
+	gfx_v12_0_set_mec_ucode_start_addr(adev);
+
+	return 0;
+}
+
+static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
+{
+	uint32_t tmp;
+	struct amdgpu_device *adev = ring->adev;
+
+	/* tell RLC which is KIQ queue */
+	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+	tmp &= 0xffffff00;
+	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
+}
+
+static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
+{
+	/* set graphics engine doorbell range */
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
+		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
+	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
+		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
+
+	/* set compute engine doorbell range */
+	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+		     (adev->doorbell_index.kiq * 2) << 2);
+	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+		     (adev->doorbell_index.userqueue_end * 2) << 2);
+}
+
+static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
+				  struct amdgpu_mqd_prop *prop)
+{
+	struct v12_gfx_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr;
+	uint32_t tmp;
+	uint32_t rb_bufsz;
+
+	/* set up gfx hqd wptr */
+	mqd->cp_gfx_hqd_wptr = 0;
+	mqd->cp_gfx_hqd_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set up mqd control */
+	tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+	mqd->cp_gfx_mqd_control = tmp;
+
+	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+	tmp = regCP_GFX_HQD_VMID_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+	mqd->cp_gfx_hqd_vmid = 0;
+
+	/* set up default queue priority level
+	 * 0x0 = low priority, 0x1 = high priority */
+	tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+	mqd->cp_gfx_hqd_queue_priority = tmp;
+
+	/* set up time quantum */
+	tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+	mqd->cp_gfx_hqd_quantum = tmp;
+
+	/* set up gfx hqd base. this is similar as CP_RB_BASE */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_gfx_hqd_rptr_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up rb_wptr_poll addr */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
+	tmp = regCP_GFX_HQD_CNTL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
+	if (!prop->kernel_queue)
+		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
+	mqd->cp_gfx_hqd_cntl = tmp;
+
+	/* set up cp_doorbell_control */
+	tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+	} else
+		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	mqd->cp_rb_doorbell_control = tmp;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
+
+	/* active the queue */
+	mqd->cp_gfx_hqd_active = 1;
+
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+	return 0;
+}
+
+static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v12_gfx_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
+
+	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		soc24_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		/* restore mqd with the backup copy */
+		if (adev->gfx.me.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset the ring */
+		ring->wptr = 0;
+		*ring->wptr_cpu_addr = 0;
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
+		if (r)
+			return r;
+	}
+
+	r = amdgpu_gfx_enable_kgq(adev, 0);
+	if (r)
+		return r;
+
+	return gfx_v12_0_cp_gfx_start(adev);
+}
+
+static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
+				      struct amdgpu_mqd_prop *prop)
+{
+	struct v12_compute_mqd *mqd = m;
+	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+	uint32_t tmp;
+
+	mqd->header = 0xC0310800;
+	mqd->compute_pipelinestat_enable = 0x00000001;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+	mqd->compute_misc_reserved = 0x00000007;
+
+	eop_base_addr = prop->eop_gpu_addr >> 8;
+	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+			(order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
+
+	mqd->cp_hqd_eop_control = tmp;
+
+	/* enable doorbell? */
+	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+
+	if (prop->use_doorbell) {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_OFFSET, prop->doorbell_index);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_SOURCE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_HIT, 0);
+	} else {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 0);
+	}
+
+	mqd->cp_hqd_pq_doorbell_control = tmp;
+
+	/* disable the queue if it's active */
+	mqd->cp_hqd_dequeue_request = 0;
+	mqd->cp_hqd_pq_rptr = 0;
+	mqd->cp_hqd_pq_wptr_lo = 0;
+	mqd->cp_hqd_pq_wptr_hi = 0;
+
+	/* set the pointer to the MQD */
+	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	tmp = regCP_MQD_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+	mqd->cp_mqd_control = tmp;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+			    (order_base_2(prop->queue_size / 4) - 1));
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+	if (prop->kernel_queue) {
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+	}
+	if (prop->tmz_queue)
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
+	mqd->cp_hqd_pq_control = tmp;
+
+	/* set the wb address whether it's enabled or not */
+	wb_gpu_addr = prop->rptr_gpu_addr;
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = prop->wptr_gpu_addr;
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	tmp = 0;
+	/* enable the doorbell if requested */
+	if (prop->use_doorbell) {
+		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				DOORBELL_OFFSET, prop->doorbell_index);
+
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_EN, 1);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_SOURCE, 0);
+		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+				    DOORBELL_HIT, 0);
+	}
+
+	mqd->cp_hqd_pq_doorbell_control = tmp;
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
+
+	/* set the vmid for the queue */
+	mqd->cp_hqd_vmid = 0;
+
+	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
+	mqd->cp_hqd_persistent_state = tmp;
+
+	/* set MIN_IB_AVAIL_SIZE */
+	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
+	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+	mqd->cp_hqd_ib_control = tmp;
+
+	/* set static priority for a compute queue/ring */
+	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
+	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
+
+	mqd->cp_hqd_active = prop->hqd_active;
+
+	/* set UQ fenceaddress */
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
+	return 0;
+}
+
+static int gfx_v12_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v12_compute_mqd *mqd = ring->mqd_ptr;
+	int j;
+
+	/* inactivate the queue */
+	if (amdgpu_sriov_vf(adev))
+		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
+
+	/* disable wptr polling */
+	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+	/* write the EOP addr */
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
+	       mqd->cp_hqd_eop_base_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
+	       mqd->cp_hqd_eop_base_addr_hi);
+
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
+	       mqd->cp_hqd_eop_control);
+
+	/* enable doorbell? */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+	       mqd->cp_hqd_pq_doorbell_control);
+
+	/* disable the queue if it's active */
+	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+		for (j = 0; j < adev->usec_timeout; j++) {
+			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
+		       mqd->cp_hqd_dequeue_request);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
+		       mqd->cp_hqd_pq_rptr);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+		       mqd->cp_hqd_pq_wptr_lo);
+		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+		       mqd->cp_hqd_pq_wptr_hi);
+	}
+
+	/* set the pointer to the MQD */
+	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
+	       mqd->cp_mqd_base_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
+	       mqd->cp_mqd_base_addr_hi);
+
+	/* set MQD vmid to 0 */
+	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
+	       mqd->cp_mqd_control);
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
+	       mqd->cp_hqd_pq_base_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
+	       mqd->cp_hqd_pq_base_hi);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
+	       mqd->cp_hqd_pq_control);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
+		mqd->cp_hqd_pq_rptr_report_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+		mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
+	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
+			(adev->doorbell_index.kiq * 2) << 2);
+		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
+			(adev->doorbell_index.userqueue_end * 2) << 2);
+	}
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
+	       mqd->cp_hqd_pq_doorbell_control);
+
+	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
+	       mqd->cp_hqd_pq_wptr_lo);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
+	       mqd->cp_hqd_pq_wptr_hi);
+
+	/* set the vmid for the queue */
+	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
+	       mqd->cp_hqd_persistent_state);
+
+	/* activate the queue */
+	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
+	       mqd->cp_hqd_active);
+
+	if (ring->use_doorbell)
+		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+	return 0;
+}
+
+static int gfx_v12_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v12_compute_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+	gfx_v12_0_kiq_setting(ring);
+
+	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+		/* reset MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+		/* reset ring buffer */
+		ring->wptr = 0;
+		amdgpu_ring_clear_ring(ring);
+
+		mutex_lock(&adev->srbm_mutex);
+		soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		gfx_v12_0_kiq_init_register(ring);
+		soc24_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	} else {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
+		mutex_lock(&adev->srbm_mutex);
+		soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		gfx_v12_0_kiq_init_register(ring);
+		soc24_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct v12_compute_mqd *mqd = ring->mqd_ptr;
+	int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
+		memset((void *)mqd, 0, sizeof(*mqd));
+		mutex_lock(&adev->srbm_mutex);
+		soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+		amdgpu_ring_init_mqd(ring);
+		soc24_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+	} else {
+		/* restore MQD to a clean status */
+		if (adev->gfx.mec.mqd_backup[mqd_idx])
+			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+		/* reset ring buffer */
+		ring->wptr = 0;
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
+		amdgpu_ring_clear_ring(ring);
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
+{
+	gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+	adev->gfx.kiq[0].ring.sched.ready = true;
+	return 0;
+}
+
+static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	if (!amdgpu_async_gfx_ring)
+		gfx_v12_0_cp_compute_enable(adev, true);
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
+		if (r)
+			return r;
+	}
+
+	return amdgpu_gfx_enable_kcq(adev, 0);
+}
+
+static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+	struct amdgpu_ring *ring;
+
+	if (!(adev->flags & AMD_IS_APU))
+		gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+		/* legacy firmware loading */
+		r = gfx_v12_0_cp_gfx_load_microcode(adev);
+		if (r)
+			return r;
+
+		r = gfx_v12_0_cp_compute_load_microcode_rs64(adev);
+		if (r)
+			return r;
+	}
+
+	gfx_v12_0_cp_set_doorbell_range(adev);
+
+	if (amdgpu_async_gfx_ring) {
+		gfx_v12_0_cp_compute_enable(adev, true);
+		gfx_v12_0_cp_gfx_enable(adev, true);
+	}
+
+	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
+		r = amdgpu_mes_kiq_hw_init(adev);
+	else
+		r = gfx_v12_0_kiq_resume(adev);
+	if (r)
+		return r;
+
+	r = gfx_v12_0_kcq_resume(adev);
+	if (r)
+		return r;
+
+	if (!amdgpu_async_gfx_ring) {
+		r = gfx_v12_0_cp_gfx_resume(adev);
+		if (r)
+			return r;
+	} else {
+		r = gfx_v12_0_cp_async_gfx_ring_resume(adev);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		ring = &adev->gfx.gfx_ring[i];
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+		r = amdgpu_ring_test_helper(ring);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+	gfx_v12_0_cp_gfx_enable(adev, enable);
+	gfx_v12_0_cp_compute_enable(adev, enable);
+}
+
+static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
+{
+	int r;
+	bool value;
+
+	r = adev->gfxhub.funcs->gart_enable(adev);
+	if (r)
+		return r;
+
+	amdgpu_device_flush_hdp(adev, NULL);
+
+	value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
+
+	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
+	/* TODO investigate why this and the hdp flush above is needed,
+	 * are we missing a flush somewhere else? */
+	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
+
+	return 0;
+}
+
+static int get_gb_addr_config(struct amdgpu_device *adev)
+{
+	u32 gb_addr_config;
+
+	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
+	if (gb_addr_config == 0)
+		return -EINVAL;
+
+	adev->gfx.config.gb_addr_config_fields.num_pkrs =
+		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
+
+	adev->gfx.config.gb_addr_config = gb_addr_config;
+
+	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_PIPES);
+
+	adev->gfx.config.max_tile_pipes =
+		adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
+	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+
+	return 0;
+}
+
+static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
+	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
+
+	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
+	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
+	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
+}
+
+static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		return;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_12_0,
+						(const u32)ARRAY_SIZE(golden_settings_gc_12_0));
+
+		if (adev->rev_id == 0)
+			soc15_program_register_sequence(adev,
+					golden_settings_gc_12_0_rev0,
+					(const u32)ARRAY_SIZE(golden_settings_gc_12_0_rev0));
+		break;
+	default:
+		break;
+	}
+}
+
+static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+	int r;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+		if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+			/* RLC autoload sequence 1: Program rlc ram */
+			if (adev->gfx.imu.funcs->program_rlc_ram)
+				adev->gfx.imu.funcs->program_rlc_ram(adev);
+		}
+		/* rlc autoload firmware */
+		r = gfx_v12_0_rlc_backdoor_autoload_enable(adev);
+		if (r)
+			return r;
+	} else {
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
+				if (adev->gfx.imu.funcs->load_microcode)
+					adev->gfx.imu.funcs->load_microcode(adev);
+				if (adev->gfx.imu.funcs->setup_imu)
+					adev->gfx.imu.funcs->setup_imu(adev);
+				if (adev->gfx.imu.funcs->start_imu)
+					adev->gfx.imu.funcs->start_imu(adev);
+			}
+
+			/* disable gpa mode in backdoor loading */
+			gfx_v12_0_disable_gpa_mode(adev);
+		}
+	}
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
+	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+		r = gfx_v12_0_wait_for_rlc_autoload_complete(adev);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
+			return r;
+		}
+	}
+
+	if (!amdgpu_emu_mode)
+		gfx_v12_0_init_golden_registers(adev);
+
+	adev->gfx.is_poweron = true;
+
+	if (get_gb_addr_config(adev))
+		DRM_WARN("Invalid gb_addr_config !\n");
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
+		gfx_v12_0_config_gfx_rs64(adev);
+
+	r = gfx_v12_0_gfxhub_enable(adev);
+	if (r)
+		return r;
+
+	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT ||
+	     adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) &&
+	     (amdgpu_dpm == 1)) {
+		/**
+		 * For gfx 12, rlc firmware loading relies on smu firmware is
+		 * loaded firstly, so in direct type, it has to load smc ucode
+		 * here before rlc.
+		 */
+		r = amdgpu_pm_load_smu_firmware(adev, NULL);
+		if (r)
+			return r;
+	}
+
+	gfx_v12_0_constants_init(adev);
+
+	if (adev->nbio.funcs->gc_doorbell_init)
+		adev->nbio.funcs->gc_doorbell_init(adev);
+
+	r = gfx_v12_0_rlc_resume(adev);
+	if (r)
+		return r;
+
+	/*
+	 * init golden registers and rlc resume may override some registers,
+	 * reconfig them here
+	 */
+	gfx_v12_0_tcp_harvest(adev);
+
+	r = gfx_v12_0_cp_resume(adev);
+	if (r)
+		return r;
+
+	return r;
+}
+
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+					      bool enable)
+{
+	unsigned int irq_type;
+	int m, p, r;
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+		for (m = 0; m < adev->gfx.me.num_me; m++) {
+			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+					+ (m * adev->gfx.mec.num_pipe_per_mec)
+					+ p;
+				if (enable)
+					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				else
+					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+							   irq_type);
+				if (r)
+					return r;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t tmp;
+
+	cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+	gfx_v12_0_set_userq_eop_interrupts(adev, false);
+
+	if (!adev->no_hw_access) {
+		if (amdgpu_async_gfx_ring) {
+			if (amdgpu_gfx_disable_kgq(adev, 0))
+				DRM_ERROR("KGQ disable failed\n");
+		}
+
+		if (amdgpu_gfx_disable_kcq(adev, 0))
+			DRM_ERROR("KCQ disable failed\n");
+
+		amdgpu_mes_kiq_hw_fini(adev);
+	}
+
+	if (amdgpu_sriov_vf(adev)) {
+		gfx_v12_0_cp_gfx_enable(adev, false);
+		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
+		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
+		tmp &= 0xffffff00;
+		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+
+		return 0;
+	}
+	gfx_v12_0_cp_enable(adev, false);
+	gfx_v12_0_enable_gui_idle_interrupt(adev, false);
+
+	adev->gfxhub.funcs->gart_disable(adev);
+
+	adev->gfx.is_poweron = false;
+
+	return 0;
+}
+
+static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v12_0_hw_fini(ip_block);
+}
+
+static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block)
+{
+	return gfx_v12_0_hw_init(ip_block);
+}
+
+static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
+				GRBM_STATUS, GUI_ACTIVE))
+		return false;
+	else
+		return true;
+}
+
+static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+	unsigned i;
+	u32 tmp;
+	struct amdgpu_device *adev = ip_block->adev;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
+			GRBM_STATUS__GUI_ACTIVE_MASK;
+
+		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+			return 0;
+		udelay(1);
+	}
+	return -ETIMEDOUT;
+}
+
+static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+	uint64_t clock = 0;
+
+	if (adev->smuio.funcs &&
+	    adev->smuio.funcs->get_gpu_clock_counter)
+		clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
+	else
+		dev_warn(adev->dev, "query gpu clock counter is not supported\n");
+
+	return clock;
+}
+
+static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	switch (amdgpu_user_queue) {
+	case -1:
+	case 0:
+	default:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = true;
+		break;
+	case 1:
+		adev->gfx.disable_kq = false;
+		adev->gfx.disable_uq = false;
+		break;
+	case 2:
+		adev->gfx.disable_kq = true;
+		adev->gfx.disable_uq = false;
+		break;
+	}
+
+	adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
+
+	if (adev->gfx.disable_kq) {
+		adev->gfx.num_gfx_rings = 0;
+		adev->gfx.num_compute_rings = 0;
+	} else {
+		adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+						  AMDGPU_MAX_COMPUTE_RINGS);
+	}
+
+	gfx_v12_0_set_kiq_pm4_funcs(adev);
+	gfx_v12_0_set_ring_funcs(adev);
+	gfx_v12_0_set_irq_funcs(adev);
+	gfx_v12_0_set_rlc_funcs(adev);
+	gfx_v12_0_set_mqd_funcs(adev);
+	gfx_v12_0_set_imu_funcs(adev);
+
+	gfx_v12_0_init_rlcg_reg_access_ctrl(adev);
+
+	return gfx_v12_0_init_microcode(adev);
+}
+
+static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
+	if (r)
+		return r;
+
+	r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static bool gfx_v12_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+	uint32_t rlc_cntl;
+
+	/* if RLC is not enabled, do nothing */
+	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
+	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev,
+				    int xcc_id)
+{
+	uint32_t data;
+	unsigned i;
+
+	data = RLC_SAFE_MODE__CMD_MASK;
+	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+
+	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
+
+	/* wait for RLC_SAFE_MODE */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
+				   RLC_SAFE_MODE, CMD))
+			break;
+		udelay(1);
+	}
+}
+
+static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev,
+				      int xcc_id)
+{
+	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
+}
+
+static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev,
+				      bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_spm_vmid(struct amdgpu_device *adev,
+				      struct amdgpu_ring *ring,
+				      unsigned vmid)
+{
+	u32 reg, data;
+
+	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		data = RREG32_NO_KIQ(reg);
+	else
+		data = RREG32(reg);
+
+	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+	if (amdgpu_sriov_is_pp_one_vf(adev))
+		WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
+	else
+		WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
+
+	if (ring
+	    && amdgpu_sriov_is_pp_one_vf(adev)
+	    && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+		|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
+		uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
+		amdgpu_ring_emit_wreg(ring, reg, data);
+	}
+}
+
+static const struct amdgpu_rlc_funcs gfx_v12_0_rlc_funcs = {
+	.is_rlc_enabled = gfx_v12_0_is_rlc_enabled,
+	.set_safe_mode = gfx_v12_0_set_safe_mode,
+	.unset_safe_mode = gfx_v12_0_unset_safe_mode,
+	.init = gfx_v12_0_rlc_init,
+	.get_csb_size = gfx_v12_0_get_csb_size,
+	.get_csb_buffer = gfx_v12_0_get_csb_buffer,
+	.resume = gfx_v12_0_rlc_resume,
+	.stop = gfx_v12_0_rlc_stop,
+	.reset = gfx_v12_0_rlc_reset,
+	.start = gfx_v12_0_rlc_start,
+	.update_spm_vmid = gfx_v12_0_update_spm_vmid,
+};
+
+#if 0
+static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	/* TODO */
+}
+
+static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+	/* TODO */
+}
+#endif
+
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+					   enum amd_powergating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	bool enable = (state == AMD_PG_STATE_GATE);
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		amdgpu_gfx_off_ctrl(adev, enable);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags &
+	      (AMD_CG_SUPPORT_GFX_CGCG |
+	      AMD_CG_SUPPORT_GFX_CGLS |
+	      AMD_CG_SUPPORT_GFX_3D_CGCG |
+	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
+		return;
+
+	if (enable) {
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+		/* unset CGCG override */
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
+		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
+		/* update CGCG override bits */
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+
+		/* enable cgcg FSM(0x0000363F) */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
+			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+		}
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
+			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+		}
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+		/* Program RLC_CGCG_CGLS_CTRL_3D */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
+			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		}
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
+			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+		}
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+
+		/* set IDLE_POLL_COUNT(0x00900100) */
+		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
+
+		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
+		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
+
+		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
+
+		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
+		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+
+		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+		if (adev->sdma.num_instances > 1) {
+			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+		}
+	} else {
+		/* Program RLC_CGCG_CGLS_CTRL */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
+
+		/* Program RLC_CGCG_CGLS_CTRL_3D */
+		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
+		if (def != data)
+			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
+	}
+}
+
+static void gfx_v12_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						       bool enable)
+{
+	uint32_t data, def;
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
+		return;
+
+	/* It is disabled by HW by default */
+	if (enable) {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
+			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+			if (def != data)
+				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+		}
+	} else {
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
+			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
+
+			if (def != data)
+				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+		}
+	}
+}
+
+static void gfx_v12_0_update_repeater_fgcg(struct amdgpu_device *adev,
+					   bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+				  RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
+				RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static void gfx_v12_0_update_sram_fgcg(struct amdgpu_device *adev,
+				       bool enable)
+{
+	uint32_t def, data;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+		return;
+
+	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+
+	if (enable)
+		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+	else
+		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
+}
+
+static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+					    bool enable)
+{
+	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+	gfx_v12_0_update_coarse_grain_clock_gating(adev, enable);
+
+	gfx_v12_0_update_medium_grain_clock_gating(adev, enable);
+
+	gfx_v12_0_update_repeater_fgcg(adev, enable);
+
+	gfx_v12_0_update_sram_fgcg(adev, enable);
+
+	gfx_v12_0_update_perf_clk(adev, enable);
+
+	if (adev->cg_flags &
+	    (AMD_CG_SUPPORT_GFX_MGCG |
+	     AMD_CG_SUPPORT_GFX_CGLS |
+	     AMD_CG_SUPPORT_GFX_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGCG |
+	     AMD_CG_SUPPORT_GFX_3D_CGLS))
+		gfx_v12_0_enable_gui_idle_interrupt(adev, enable);
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+
+	return 0;
+}
+
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+					   enum amd_clockgating_state state)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
+	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+	case IP_VERSION(12, 0, 0):
+	case IP_VERSION(12, 0, 1):
+		gfx_v12_0_update_gfx_clock_gating(adev,
+						  state == AMD_CG_STATE_GATE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	int data;
+
+	/* AMD_CG_SUPPORT_GFX_MGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+	/* AMD_CG_SUPPORT_REPEATER_FGCG */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
+
+	/* AMD_CG_SUPPORT_GFX_FGCG */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
+
+	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
+	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
+		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
+
+	/* AMD_CG_SUPPORT_GFX_CGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
+	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
+	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v12_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	/* gfx12 is 32bit rptr*/
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	} else {
+		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
+		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
+	}
+
+	return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+			     lower_32_bits(ring->wptr));
+		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+			     upper_32_bits(ring->wptr));
+	}
+}
+
+static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+	/* gfx12 hardware is 32bit rptr */
+	return *(uint32_t *)ring->rptr_cpu_addr;
+}
+
+static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	u64 wptr;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell)
+		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+	else
+		BUG();
+	return wptr;
+}
+
+static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+			     ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		BUG(); /* only DOORBELL method supported on gfx12 now */
+	}
+}
+
+static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u32 ref_and_mask, reg_mem_engine;
+	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+		switch (ring->me) {
+		case 1:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+			break;
+		case 2:
+			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+			break;
+		default:
+			return;
+		}
+		reg_mem_engine = 0;
+	} else {
+		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+		reg_mem_engine = 1; /* pfp */
+	}
+
+	gfx_v12_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
+			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
+			       ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+				       struct amdgpu_job *job,
+				       struct amdgpu_ib *ib,
+				       uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 header, control = 0;
+
+	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+	control |= ib->length_dw | (vmid << 24);
+
+	amdgpu_ring_write(ring, header);
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+		(2 << 0) |
+#endif
+		lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+					   struct amdgpu_job *job,
+					   struct amdgpu_ib *ib,
+					   uint32_t flags)
+{
+	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+				(2 << 0) |
+#endif
+				lower_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+				     u64 seq, unsigned flags)
+{
+	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+	/* RELEASE_MEM - flush caches, send int */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
+				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+	/*
+	 * the address should be Qword aligned if 64bit write, Dword
+	 * aligned if only send 32bit data low (discard data high)
+	 */
+	if (write64bit)
+		BUG_ON(addr & 0x7);
+	else
+		BUG_ON(addr & 0x3);
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+	amdgpu_ring_write(ring, upper_32_bits(seq));
+	amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	gfx_v12_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+			       upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
+				   uint16_t pasid, uint32_t flush_type,
+				   bool all_hub, uint8_t dst_sel)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
+			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+}
+
+static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					 unsigned vmid, uint64_t pd_addr)
+{
+	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+	/* compute doesn't have PFP */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+		/* sync PFP to ME, otherwise we might get invalid PFP reads */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		amdgpu_ring_write(ring, 0x0);
+	}
+}
+
+static void gfx_v12_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+					  u64 seq, unsigned int flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* we only allocate 32bit for each seq wb address */
+	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+	/* write fence seq to the "addr" */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, lower_32_bits(seq));
+
+	if (flags & AMDGPU_FENCE_FLAG_INT) {
+		/* set register to trigger INT */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+	}
+}
+
+static void gfx_v12_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
+					 uint32_t flags)
+{
+	uint32_t dw2 = 0;
+
+	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+		/* set load_global_config & load_global_uconfig */
+		dw2 |= 0x8001;
+		/* set load_cs_sh_regs */
+		dw2 |= 0x01000000;
+		/* set load_per_context_state & load_gfx_sh_regs for GFX */
+		dw2 |= 0x10002;
+	}
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+	amdgpu_ring_write(ring, dw2);
+	amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v12_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
+						   uint64_t addr)
+{
+	unsigned ret;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	/* discard following DWs if *cond_exec_gpu_addr==0 */
+	amdgpu_ring_write(ring, 0);
+	ret = ring->wptr & ring->buf_mask;
+	/* patch dummy value later */
+	amdgpu_ring_write(ring, 0);
+
+	return ret;
+}
+
+static int gfx_v12_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (adev->enable_mes)
+		return -EINVAL;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ++ring->trail_seq);
+	amdgpu_ring_commit(kiq_ring);
+
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+	}
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
+}
+
+static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring,
+					   bool start,
+					   bool secure)
+{
+	uint32_t v = secure ? FRAME_TMZ : 0;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+}
+
+static void gfx_v12_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+				     uint32_t reg_val_offs)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+	amdgpu_ring_write(ring, 0 |	/* src: register*/
+				(5 << 8) |	/* dst: memory */
+				(1 << 20));	/* write confirm */
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+				reg_val_offs * 4));
+}
+
+static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring,
+				     uint32_t reg,
+				     uint32_t val)
+{
+	uint32_t cmd = 0;
+
+	switch (ring->funcs->type) {
+	case AMDGPU_RING_TYPE_GFX:
+		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+		break;
+	case AMDGPU_RING_TYPE_KIQ:
+		cmd = (1 << 16); /* no inc addr */
+		break;
+	default:
+		cmd = WR_CONFIRM;
+		break;
+	}
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, cmd);
+	amdgpu_ring_write(ring, reg);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v12_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+					uint32_t val, uint32_t mask)
+{
+	gfx_v12_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	gfx_v12_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+			       ref, mask, 0x20);
+}
+
+static void
+gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+				      uint32_t me, uint32_t pipe,
+				      enum amdgpu_interrupt_state state)
+{
+	uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+	if (!me) {
+		switch (pipe) {
+		case 0:
+			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 0);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    GENERIC0_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    TIME_STAMP_INT_ENABLE, 1);
+		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+					    GENERIC0_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static void gfx_v12_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+						     int me, int pipe,
+						     enum amdgpu_interrupt_state state)
+{
+	u32 mec_int_cntl, mec_int_cntl_reg;
+
+	/*
+	 * amdgpu controls only the first MEC. That's why this function only
+	 * handles the setting of interrupts for this specific MEC. All other
+	 * pipes' interrupts are set by amdkfd.
+	 */
+
+	if (me == 1) {
+		switch (pipe) {
+		case 0:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
+			break;
+		case 1:
+			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
+			break;
+		default:
+			DRM_DEBUG("invalid pipe %d\n", pipe);
+			return;
+		}
+	} else {
+		DRM_DEBUG("invalid me %d\n", me);
+		return;
+	}
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 0);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     GENERIC0_INT_ENABLE, 0);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	case AMDGPU_IRQ_STATE_ENABLE:
+		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     TIME_STAMP_INT_ENABLE, 1);
+		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+					     GENERIC0_INT_ENABLE, 1);
+		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
+		break;
+	default:
+		break;
+	}
+}
+
+static int gfx_v12_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *src,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	switch (type) {
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+		gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+		gfx_v12_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+		gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+		gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+		gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+		break;
+	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+		gfx_v12_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
+			     struct amdgpu_irq_src *source,
+			     struct amdgpu_iv_entry *entry)
+{
+	u32 doorbell_offset = entry->src_data[0];
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+	int i;
+
+	DRM_DEBUG("IH: CP EOP\n");
+
+	if (adev->enable_mes && doorbell_offset) {
+		struct amdgpu_userq_fence_driver *fence_drv = NULL;
+		struct xarray *xa = &adev->userq_xa;
+		unsigned long flags;
+
+		xa_lock_irqsave(xa, flags);
+		fence_drv = xa_load(xa, doorbell_offset);
+		if (fence_drv)
+			amdgpu_userq_fence_driver_process(fence_drv);
+		xa_unlock_irqrestore(xa, flags);
+	} else {
+		me_id = (entry->ring_id & 0x0c) >> 2;
+		pipe_id = (entry->ring_id & 0x03) >> 0;
+		queue_id = (entry->ring_id & 0x70) >> 4;
+
+		switch (me_id) {
+		case 0:
+			if (pipe_id == 0)
+				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+			else
+				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				/* Per-queue interrupt is supported for MEC starting from VI.
+				 * The interrupt can only be enabled/disabled per pipe instead
+				 * of per queue.
+				 */
+				if ((ring->me == me_id) &&
+				    (ring->pipe == pipe_id) &&
+				    (ring->queue == queue_id))
+					amdgpu_fence_process(ring);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+					      struct amdgpu_irq_src *source,
+					      unsigned int type,
+					      enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    PRIV_REG_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int gfx_v12_0_set_bad_op_fault_state(struct amdgpu_device *adev,
+					    struct amdgpu_irq_src *source,
+					    unsigned type,
+					    enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+				/* MECs start at 1 */
+				cp_int_cntl_reg = gfx_v12_0_get_cpc_int_cntl(adev, i + 1, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+								    OPCODE_ERROR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int gfx_v12_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+					       struct amdgpu_irq_src *source,
+					       unsigned int type,
+					       enum amdgpu_interrupt_state state)
+{
+	u32 cp_int_cntl_reg, cp_int_cntl;
+	int i, j;
+
+	switch (state) {
+	case AMDGPU_IRQ_STATE_DISABLE:
+	case AMDGPU_IRQ_STATE_ENABLE:
+		for (i = 0; i < adev->gfx.me.num_me; i++) {
+			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+				cp_int_cntl_reg = gfx_v12_0_get_cpg_int_cntl(adev, i, j);
+
+				if (cp_int_cntl_reg) {
+					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
+					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+								    PRIV_INSTR_INT_ENABLE,
+								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
+					struct amdgpu_iv_entry *entry)
+{
+	u8 me_id, pipe_id, queue_id;
+	struct amdgpu_ring *ring;
+	int i;
+
+	me_id = (entry->ring_id & 0x0c) >> 2;
+	pipe_id = (entry->ring_id & 0x03) >> 0;
+	queue_id = (entry->ring_id & 0x70) >> 4;
+
+	if (!adev->gfx.disable_kq) {
+		switch (me_id) {
+		case 0:
+			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+				ring = &adev->gfx.gfx_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		case 1:
+		case 2:
+			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+				ring = &adev->gfx.compute_ring[i];
+				if (ring->me == me_id && ring->pipe == pipe_id &&
+				    ring->queue == queue_id)
+					drm_sched_fault(&ring->sched);
+			}
+			break;
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static int gfx_v12_0_priv_reg_irq(struct amdgpu_device *adev,
+				  struct amdgpu_irq_src *source,
+				  struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal register access in command stream\n");
+	gfx_v12_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v12_0_bad_op_irq(struct amdgpu_device *adev,
+				struct amdgpu_irq_src *source,
+				struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal opcode in command stream \n");
+	gfx_v12_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static int gfx_v12_0_priv_inst_irq(struct amdgpu_device *adev,
+				   struct amdgpu_irq_src *source,
+				   struct amdgpu_iv_entry *entry)
+{
+	DRM_ERROR("Illegal instruction in command stream\n");
+	gfx_v12_0_handle_priv_fault(adev, entry);
+	return 0;
+}
+
+static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+	const unsigned int gcr_cntl =
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
+			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
+
+	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
+	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
+	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
+	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
+	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
+	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
+	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+}
+
+static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+	/* Header itself is a NOP packet */
+	if (num_nop == 1) {
+		amdgpu_ring_write(ring, ring->funcs->nop);
+		return;
+	}
+
+	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
+	amdgpu_ring_insert_nop(ring, num_nop - 1);
+}
+
+static void gfx_v12_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+	/* Emit the cleaner shader */
+	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	for (i = 0; i < reg_count; i++)
+		drm_printf(p, "%-50s \t 0x%08x\n",
+			   gc_reg_list_12_0[i].reg_name,
+			   adev->gfx.ip_dump_core[i]);
+
+	/* print compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.mec.num_mec,
+		   adev->gfx.mec.num_pipe_per_mec,
+		   adev->gfx.mec.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					drm_printf(p, "%-50s \t 0x%08x\n",
+						   gc_cp_reg_list_12[reg].reg_name,
+						   adev->gfx.ip_dump_compute_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+
+	/* print gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
+		   adev->gfx.me.num_me,
+		   adev->gfx.me.num_pipe_per_me,
+		   adev->gfx.me.num_queue_per_pipe);
+
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
+				for (reg = 0; reg < reg_count; reg++) {
+					drm_printf(p, "%-50s \t 0x%08x\n",
+						   gc_gfx_queue_reg_list_12[reg].reg_name,
+						   adev->gfx.ip_dump_gfx_queues[index + reg]);
+				}
+				index += reg_count;
+			}
+		}
+	}
+}
+
+static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
+{
+	struct amdgpu_device *adev = ip_block->adev;
+	uint32_t i, j, k, reg, index = 0;
+	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
+
+	if (!adev->gfx.ip_dump_core)
+		return;
+
+	amdgpu_gfx_off_ctrl(adev, false);
+	for (i = 0; i < reg_count; i++)
+		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i]));
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump compute queue registers for all instances */
+	if (!adev->gfx.ip_dump_compute_queues)
+		return;
+
+	reg_count = ARRAY_SIZE(gc_cp_reg_list_12);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
+			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
+				/* ME0 is for GFX so start from 1 for CP */
+				soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
+				for (reg = 0; reg < reg_count; reg++) {
+					adev->gfx.ip_dump_compute_queues[index + reg] =
+						RREG32(SOC15_REG_ENTRY_OFFSET(
+							gc_cp_reg_list_12[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/* dump gfx queue registers for all instances */
+	if (!adev->gfx.ip_dump_gfx_queues)
+		return;
+
+	index = 0;
+	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12);
+	amdgpu_gfx_off_ctrl(adev, false);
+	mutex_lock(&adev->srbm_mutex);
+	for (i = 0; i < adev->gfx.me.num_me; i++) {
+		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
+			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
+				soc24_grbm_select(adev, i, j, k, 0);
+
+				for (reg = 0; reg < reg_count; reg++) {
+					adev->gfx.ip_dump_gfx_queues[index + reg] =
+						RREG32(SOC15_REG_ENTRY_OFFSET(
+							gc_gfx_queue_reg_list_12[reg]));
+				}
+				index += reg_count;
+			}
+		}
+	}
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	amdgpu_gfx_off_ctrl(adev, true);
+}
+
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+	/* Disable the pipe reset until the CPFW fully support it.*/
+	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+	return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r;
+
+	if (!gfx_v12_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v12_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	switch (ring->pipe) {
+	case 0:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE0_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE0_RESET, 0);
+		break;
+	case 1:
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 1);
+		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 1);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   PFP_PIPE1_RESET, 0);
+		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+					   ME_PIPE1_RESET, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+					RS64_FW_UC_START_ADDR_LO;
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v12_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* Sometimes the ME start pc counter can't cache correctly, so the
+	 * PC check only as a reference and pipe reset result rely on the
+	 * later ring test.
+	 */
+	return 0;
+}
+
+static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
+	if (r) {
+		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+		r = gfx_v12_reset_gfx_pipe(ring);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v12_0_kgq_init_queue(ring, true);
+	if (r) {
+		dev_err(adev->dev, "failed to init kgq\n");
+		return r;
+	}
+
+	r = amdgpu_mes_map_legacy_queue(adev, ring);
+	if (r) {
+		dev_err(adev->dev, "failed to remap kgq\n");
+		return r;
+	}
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t reset_pipe = 0, clean_pipe = 0;
+	int r = 0;
+
+	if (!gfx_v12_pipe_reset_support(adev))
+		return -EOPNOTSUPP;
+
+	gfx_v12_0_set_safe_mode(adev, 0);
+	mutex_lock(&adev->srbm_mutex);
+	soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+	clean_pipe = reset_pipe;
+
+	if (adev->gfx.rs64_enable) {
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE1_RESET, 0);
+			break;
+		case 2:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE2_RESET, 0);
+			break;
+		case 3:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+						   MEC_PIPE3_RESET, 0);
+			break;
+		default:
+			break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+				RS64_FW_UC_START_ADDR_LO;
+	} else {
+		switch (ring->pipe) {
+		case 0:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE0_RESET, 0);
+			break;
+		case 1:
+			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 1);
+			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+							   MEC_ME1_PIPE1_RESET, 0);
+			break;
+		default:
+		break;
+		}
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+		/* Doesn't find the F32 MEC instruction pointer register, and suppose
+		 * the driver won't run into the F32 mode.
+		 */
+	}
+
+	soc24_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	gfx_v12_0_unset_safe_mode(adev, 0);
+
+	dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+			r == 0 ? "successfully" : "failed");
+	/* Need the ring test to verify the pipe reset result.*/
+	return 0;
+}
+
+static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
+			       unsigned int vmid,
+			       struct amdgpu_fence *timedout_fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int r;
+
+	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
+
+	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+	if (r) {
+		dev_warn(adev->dev, "fail(%d) to reset kcq  and try pipe reset\n", r);
+		r = gfx_v12_0_reset_compute_pipe(ring);
+		if (r)
+			return r;
+	}
+
+	r = gfx_v12_0_kcq_init_queue(ring, true);
+	if (r) {
+		dev_err(adev->dev, "failed to init kcq\n");
+		return r;
+	}
+	r = amdgpu_mes_map_legacy_queue(adev, ring);
+	if (r) {
+		dev_err(adev->dev, "failed to remap kcq\n");
+		return r;
+	}
+
+	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
+}
+
+static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_begin_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring)
+{
+	amdgpu_gfx_profile_ring_end_use(ring);
+
+	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
+static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
+	.name = "gfx_v12_0",
+	.early_init = gfx_v12_0_early_init,
+	.late_init = gfx_v12_0_late_init,
+	.sw_init = gfx_v12_0_sw_init,
+	.sw_fini = gfx_v12_0_sw_fini,
+	.hw_init = gfx_v12_0_hw_init,
+	.hw_fini = gfx_v12_0_hw_fini,
+	.suspend = gfx_v12_0_suspend,
+	.resume = gfx_v12_0_resume,
+	.is_idle = gfx_v12_0_is_idle,
+	.wait_for_idle = gfx_v12_0_wait_for_idle,
+	.set_clockgating_state = gfx_v12_0_set_clockgating_state,
+	.set_powergating_state = gfx_v12_0_set_powergating_state,
+	.get_clockgating_state = gfx_v12_0_get_clockgating_state,
+	.dump_ip_state = gfx_v12_ip_dump,
+	.print_ip_state = gfx_v12_ip_print,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
+	.get_rptr = gfx_v12_0_ring_get_rptr_gfx,
+	.get_wptr = gfx_v12_0_ring_get_wptr_gfx,
+	.set_wptr = gfx_v12_0_ring_set_wptr_gfx,
+	.emit_frame_size = /* totally 242 maximum if 16 IBs */
+		5 + /* COND_EXEC */
+		7 + /* PIPELINE_SYNC */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* VM_FLUSH */
+		8 + /* FENCE for VM_FLUSH */
+		5 + /* COND_EXEC */
+		7 + /* HDP_flush */
+		4 + /* VGT_flush */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		8 + /* gfx_v12_0_emit_mem_sync */
+		2, /* gfx_v12_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	4, /* gfx_v12_0_ring_emit_ib_gfx */
+	.emit_ib = gfx_v12_0_ring_emit_ib_gfx,
+	.emit_fence = gfx_v12_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+	.emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v12_0_ring_test_ring,
+	.test_ib = gfx_v12_0_ring_test_ib,
+	.insert_nop = gfx_v12_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_cntxcntl = gfx_v12_0_ring_emit_cntxcntl,
+	.init_cond_exec = gfx_v12_0_ring_emit_init_cond_exec,
+	.preempt_ib = gfx_v12_0_ring_preempt_ib,
+	.emit_frame_cntl = gfx_v12_0_ring_emit_frame_cntl,
+	.emit_wreg = gfx_v12_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v12_0_emit_mem_sync,
+	.reset = gfx_v12_0_reset_kgq,
+	.emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v12_0_ring_begin_use,
+	.end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
+	.type = AMDGPU_RING_TYPE_COMPUTE,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		7 + /* gfx_v12_0_ring_emit_hdp_flush */
+		5 + /* hdp invalidate */
+		7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v12_0_ring_emit_vm_flush */
+		8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
+		8 + /* gfx_v12_0_emit_mem_sync */
+		2, /* gfx_v12_0_ring_emit_cleaner_shader */
+	.emit_ib_size =	7, /* gfx_v12_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v12_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v12_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v12_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v12_0_ring_emit_vm_flush,
+	.emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v12_0_ring_test_ring,
+	.test_ib = gfx_v12_0_ring_test_ib,
+	.insert_nop = gfx_v12_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_wreg = gfx_v12_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+	.emit_mem_sync = gfx_v12_0_emit_mem_sync,
+	.reset = gfx_v12_0_reset_kcq,
+	.emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+	.begin_use = gfx_v12_0_ring_begin_use,
+	.end_use = gfx_v12_0_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
+	.type = AMDGPU_RING_TYPE_KIQ,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
+	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
+	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.emit_frame_size =
+		7 + /* gfx_v12_0_ring_emit_hdp_flush */
+		5 + /*hdp invalidate */
+		7 + /* gfx_v12_0_ring_emit_pipeline_sync */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* gfx_v12_0_ring_emit_vm_flush */
+		8 + 8 + 8, /* gfx_v12_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+	.emit_ib_size =	7, /* gfx_v12_0_ring_emit_ib_compute */
+	.emit_ib = gfx_v12_0_ring_emit_ib_compute,
+	.emit_fence = gfx_v12_0_ring_emit_fence_kiq,
+	.test_ring = gfx_v12_0_ring_test_ring,
+	.test_ib = gfx_v12_0_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_rreg = gfx_v12_0_ring_emit_rreg,
+	.emit_wreg = gfx_v12_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v12_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait,
+	.emit_hdp_flush = gfx_v12_0_ring_emit_hdp_flush,
+};
+
+static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+	int i;
+
+	adev->gfx.kiq[0].ring.funcs = &gfx_v12_0_ring_funcs_kiq;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+		adev->gfx.gfx_ring[i].funcs = &gfx_v12_0_ring_funcs_gfx;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		adev->gfx.compute_ring[i].funcs = &gfx_v12_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_eop_irq_funcs = {
+	.set = gfx_v12_0_set_eop_interrupt_state,
+	.process = gfx_v12_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_reg_irq_funcs = {
+	.set = gfx_v12_0_set_priv_reg_fault_state,
+	.process = gfx_v12_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_bad_op_irq_funcs = {
+	.set = gfx_v12_0_set_bad_op_fault_state,
+	.process = gfx_v12_0_bad_op_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v12_0_priv_inst_irq_funcs = {
+	.set = gfx_v12_0_set_priv_inst_fault_state,
+	.process = gfx_v12_0_priv_inst_irq,
+};
+
+static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+	adev->gfx.eop_irq.funcs = &gfx_v12_0_eop_irq_funcs;
+
+	adev->gfx.priv_reg_irq.num_types = 1;
+	adev->gfx.priv_reg_irq.funcs = &gfx_v12_0_priv_reg_irq_funcs;
+
+	adev->gfx.bad_op_irq.num_types = 1;
+	adev->gfx.bad_op_irq.funcs = &gfx_v12_0_bad_op_irq_funcs;
+
+	adev->gfx.priv_inst_irq.num_types = 1;
+	adev->gfx.priv_inst_irq.funcs = &gfx_v12_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev)
+{
+	if (adev->flags & AMD_IS_APU)
+		adev->gfx.imu.mode = MISSION_MODE;
+	else
+		adev->gfx.imu.mode = DEBUG_MODE;
+
+	adev->gfx.imu.funcs = &gfx_v12_0_imu_funcs;
+}
+
+static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+	adev->gfx.rlc.funcs = &gfx_v12_0_rlc_funcs;
+}
+
+static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev)
+{
+	/* set gfx eng mqd */
+	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
+		sizeof(struct v12_gfx_mqd);
+	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
+		gfx_v12_0_gfx_mqd_init;
+	/* set compute eng mqd */
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
+		sizeof(struct v12_compute_mqd);
+	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
+		gfx_v12_0_compute_mqd_init;
+}
+
+static void gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+							  u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 data, wgp_bitmask;
+	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
+
+	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+	wgp_bitmask =
+		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+	return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v12_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+	u32 wgp_idx, wgp_active_bitmap;
+	u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+	wgp_active_bitmap = gfx_v12_0_get_wgp_active_bitmap_per_sh(adev);
+	cu_active_bitmap = 0;
+
+	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+		/* if there is one WGP enabled, it means 2 CUs will be enabled */
+		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+		if (wgp_active_bitmap & (1 << wgp_idx))
+			cu_active_bitmap |= cu_bitmap_per_wgp;
+	}
+
+	return cu_active_bitmap;
+}
+
+static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev,
+				 struct amdgpu_cu_info *cu_info)
+{
+	int i, j, k, counter, active_cu_number = 0;
+	u32 mask, bitmap;
+	unsigned disable_masks[8 * 2];
+
+	if (!adev || !cu_info)
+		return -EINVAL;
+
+	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			bitmap = i * adev->gfx.config.max_sh_per_se + j;
+			if (!((gfx_v12_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
+				continue;
+			mask = 1;
+			counter = 0;
+			gfx_v12_0_select_se_sh(adev, i, j, 0xffffffff, 0);
+			if (i < 8 && j < 2)
+				gfx_v12_0_set_user_wgp_inactive_bitmap_per_sh(
+					adev, disable_masks[i * 2 + j]);
+			bitmap = gfx_v12_0_get_cu_active_bitmap_per_sh(adev);
+
+			/**
+			 * GFX12 could support more than 4 SEs, while the bitmap
+			 * in cu_info struct is 4x4 and ioctl interface struct
+			 * drm_amdgpu_info_device should keep stable.
+			 * So we use last two columns of bitmap to store cu mask for
+			 * SEs 4 to 7, the layout of the bitmap is as below:
+			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
+			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
+			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
+			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
+			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
+			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
+			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
+			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
+			 */
+			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
+
+			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+				if (bitmap & mask)
+					counter++;
+
+				mask <<= 1;
+			}
+			active_cu_number += counter;
+		}
+	}
+	gfx_v12_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	cu_info->number = active_cu_number;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+	return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v12_0_ip_block = {
+	.type = AMD_IP_BLOCK_TYPE_GFX,
+	.major = 12,
+	.minor = 0,
+	.rev = 0,
+	.funcs = &gfx_v12_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
new file mode 100644
index 000000000000..f7184b2dc4e8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2023 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V12_0_H__
+#define __GFX_V12_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+				      bool req);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
new file mode 100644
index 000000000000..80565392313f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -0,0 +1,3602 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_ucode.h"
+#include "clearstate_si.h"
+#include "si.h"
+#include "sid.h"
+
+#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
+
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
+
+#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
+#include "gmc/gmc_6_0_d.h"
+#include "gmc/gmc_6_0_sh_mask.h"
+
+#include "dce/dce_6_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
+#include "si_enums.h"
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN        0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN         0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN        0x02010001
+
+#define GFX6_NUM_GFX_RINGS     1
+#define GFX6_NUM_COMPUTE_RINGS 2
+
+static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
+
+MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_me.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_ce.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_me.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/verde_pfp.bin");
+MODULE_FIRMWARE("amdgpu/verde_me.bin");
+MODULE_FIRMWARE("amdgpu/verde_ce.bin");
+MODULE_FIRMWARE("amdgpu/verde_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/oland_pfp.bin");
+MODULE_FIRMWARE("amdgpu/oland_me.bin");
+MODULE_FIRMWARE("amdgpu/oland_ce.bin");
+MODULE_FIRMWARE("amdgpu/oland_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/hainan_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hainan_me.bin");
+MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
+MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
+
+static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
+static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer);
+//static void gfx_v6_0_init_cp_pg_table(struct amdgpu_device *adev);
+static void gfx_v6_0_init_pg(struct amdgpu_device *adev);
+
+#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
+#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
+#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
+#define MICRO_TILE_MODE(x)				((x) << 0)
+#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
+#define BANK_WIDTH(x)					((x) << 14)
+#define BANK_HEIGHT(x)					((x) << 16)
+#define MACRO_TILE_ASPECT(x)				((x) << 18)
+#define NUM_BANKS(x)					((x) << 20)
+
+static const u32 verde_rlc_save_restore_register_list[] =
+{
+	(0x8000 << 16) | (0x98f4 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x98f4 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0xe80 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0xe80 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x89bc >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x89bc >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x8c1c >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x8c1c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x98f0 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xe7c >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x9148 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x9148 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9150 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x897c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8d8c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xac54 >> 2),
+	0X00000000,
+	0x3,
+	(0x9c00 << 16) | (0x98f8 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9910 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9914 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9918 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x991c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9920 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9924 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9928 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x992c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9930 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9934 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9938 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x993c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9940 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9944 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9948 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x994c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9950 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9954 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9958 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x995c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9960 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9964 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9968 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x996c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9970 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9974 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9978 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x997c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9980 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9984 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9988 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x998c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8c00 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8c14 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8c04 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8c08 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x9b7c >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x9b7c >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0xe84 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0xe84 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x89c0 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x89c0 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x914c >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x914c >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x8c20 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x8c20 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x9354 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x9354 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9060 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9364 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9100 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x913c >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x90e0 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x90e4 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x90e8 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x90e0 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x90e4 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x90e8 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8bcc >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8b24 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x88c4 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8e50 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8c0c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8e58 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8e5c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9508 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x950c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9494 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xac0c >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xac10 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xac14 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xae00 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0xac08 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x88d4 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x88c8 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x88cc >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x89b0 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8b10 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x8a14 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9830 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9834 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9838 >> 2),
+	0x00000000,
+	(0x9c00 << 16) | (0x9a10 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x9870 >> 2),
+	0x00000000,
+	(0x8000 << 16) | (0x9874 >> 2),
+	0x00000000,
+	(0x8001 << 16) | (0x9870 >> 2),
+	0x00000000,
+	(0x8001 << 16) | (0x9874 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x9870 >> 2),
+	0x00000000,
+	(0x8040 << 16) | (0x9874 >> 2),
+	0x00000000,
+	(0x8041 << 16) | (0x9870 >> 2),
+	0x00000000,
+	(0x8041 << 16) | (0x9874 >> 2),
+	0x00000000,
+	0x00000000
+};
+
+static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
+{
+	const char *chip_name;
+	int err;
+	const struct gfx_firmware_header_v1_0 *cp_hdr;
+	const struct rlc_firmware_header_v1_0 *rlc_hdr;
+
+	DRM_DEBUG("\n");
+
+	switch (adev->asic_type) {
+	case CHIP_TAHITI:
+		chip_name = "tahiti";
+		break;
+	case CHIP_PITCAIRN:
+		chip_name = "pitcairn";
+		break;
+	case CHIP_VERDE:
+		chip_name = "verde";
+		break;
+	case CHIP_OLAND:
+		chip_name = "oland";
+		break;
+	case CHIP_HAINAN:
+		chip_name = "hainan";
+		break;
+	default: BUG();
+	}
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_pfp.bin", chip_name);
+	if (err)
+		goto out;
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_me.bin", chip_name);
+	if (err)
+		goto out;
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_ce.bin", chip_name);
+	if (err)
+		goto out;
+	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+				   AMDGPU_UCODE_REQUIRED,
+				   "amdgpu/%s_rlc.bin", chip_name);
+	if (err)
+		goto out;
+	rlc_hdr = (const struct rlc_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+
+out:
+	if (err) {
+		pr_err("gfx6: Failed to load firmware %s gfx firmware\n", chip_name);
+		amdgpu_ucode_release(&adev->gfx.pfp_fw);
+		amdgpu_ucode_release(&adev->gfx.me_fw);
+		amdgpu_ucode_release(&adev->gfx.ce_fw);
+		amdgpu_ucode_release(&adev->gfx.rlc_fw);
+	}
+	return err;
+}
+
+static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	u32 reg_offset, split_equal_to_row_size, *tilemode;
+
+	memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array));
+	tilemode = adev->gfx.config.tile_mode_array;
+
+	switch (adev->gfx.config.mem_row_size_in_kb) {
+	case 1:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
+		break;
+	case 2:
+	default:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
+		break;
+	case 4:
+		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
+		break;
+	}
+
+	if (adev->asic_type == CHIP_VERDE) {
+		tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[8] =   ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
+		tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK);
+		tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[18] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16);
+		tilemode[19] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[20] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK) |
+				TILE_SPLIT(split_equal_to_row_size);
+		tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK);
+		tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK);
+		tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[26] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[27] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);
+		tilemode[28] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+				ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+				PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+				BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK);